LCOV - code coverage report
Current view: top level - util/tmpl - fd_bplus.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 745 767 97.1 %
Date: 2024-11-13 11:58:15 Functions: 54 61 88.5 %

          Line data    Source code
       1             : /* Generate prototypes, inlines and/or implementations for an ultra high
       2             :    performance bplus-tree-based key-val store.  A bplus tree can be
       3             :    persisted beyond the lifetime of creating process, used concurrently,
       4             :    used IPC, relocated in memory, naively serialized/deserialized and/or
       5             :    moved between hosts.  Virtually all operations on a bplus tree are a
       6             :    fast O(lg N) (where N is the number of elements stored) or better in
       7             :    worst case.
       8             : 
       9             :    At its core, this is fast binary search on a sorted array.  But the
      10             :    sorted array has been partition into leaves where each leaf is
      11             :    responsible for a continuous disjoint portion of the key space and
      12             :    union of the ranges covered by the leaves covers the entire key
      13             :    space.  The leaves are stored in a tree whose nodes have a large and
      14             :    flexible number of branches per each node that specify how leaves
      15             :    completely partition key space.  Further, to support fast forward and
      16             :    reverse iteration, the leaves are organized into a sorted doubly
      17             :    linked list.  Lastly, the interior nodes and leaves are guaranteed to
      18             :    be full enough that query has a fast O(lg N) worst case and have
      19             :    enough slack that insert / upsert / remove also have fast O(lg N)
      20             :    worst case.
      21             : 
      22             :    This leads to a number of improvements over textbook bplus trees,
      23             :    including:
      24             : 
      25             :    - Removal doesn't require nearly as much reshuffling of the interior
      26             :      nodes.  The only requirement here is that interior nodes form a
      27             :      complete partitioning of the key space.  (There is no requirement
      28             :      that interior nodes store copy of keys found in leaf nodes.)
      29             : 
      30             :    - No extra storage in the node is required for identifying whether or
      31             :      not an interior node or a leaf.
      32             : 
      33             :    - The leaf pair max radix can be independently tuned from the
      34             :      interior node pair radix (especially useful if sizing interior
      35             :      nodes and leaves to match things like memory page sizes).
      36             : 
      37             :    - Supports fast reverse iteration.
      38             : 
      39             :    Typical usage:
      40             : 
      41             :      struct mypair {
      42             :        mykey_t key;
      43             : 
      44             :        ... key can be located arbitrarily in struct (and renamed if
      45             :        ... needed ... see BPLUS_PAIR_KEY).  It is managed by the bplus
      46             :        ... tree and should not be modified).
      47             : 
      48             :        ... IMPORTANT SAFETY TIP!  The location of a pair can be changed
      49             :        ... by insert / upsert / remove operations.
      50             : 
      51             :      };
      52             : 
      53             :      typedef struct mypair mypair_t;
      54             : 
      55             :      #define BPLUS_NAME         mybplus
      56             :      #define BPLUS_KEY_T        mykey_t
      57             :      #define BPLUS_PAIR_T       mypair_t
      58             :      #define BPLUS_KEY_CMP(a,b) mykeycmp( (a), (b) )
      59             :      #include "fd_bplus.c"
      60             : 
      61             :    will provide the following APIs as a header only style library in the
      62             :    compilation unit:
      63             : 
      64             :      // A myplus_t is an opaque handle to a join to a bplus tree
      65             : 
      66             :      struct mybplus_private;
      67             :      typedef struct mybplus_private bplus_t;
      68             : 
      69             :      // A myplus_iter_t is an opaque handle to a bplus tree iterator
      70             : 
      71             :      struct mybplus_private_iter;
      72             :      typedef struct mybplus_private_iter mybplus_iter_t;
      73             : 
      74             :      // Constructors
      75             : 
      76             :      // mybplus_{leaf,node}_max_est returns a conservative estimate of
      77             :      // the number of {leaves,nodes} needed for a worst case bplus tree
      78             :      // containing ele_max_est elements.
      79             : 
      80             :      ulong mybplus_leaf_max_est( ulong ele_max_est );
      81             :      ulong mybplus_node_max_est( ulong ele_max_est );
      82             : 
      83             :      // mybplus_{align,footprint,new,join,leave,delete} have the usual
      84             :      // persistent IPC object constructors / destructors semantics.
      85             : 
      86             :      ulong mybplus_align    ( void );
      87             :      ulong mybplus_footprint( ulong node_max, ulong leaf_max );
      88             : 
      89             :      void      * mybplus_new   ( void * shmem, ulong node_max, ulong leaf_max );
      90             :      mybplus_t * mybplus_join  ( void * shbplus );
      91             :      void      * mybplus_leave ( mybplus_t * join );
      92             :      void      * mybplus_delete( void * shbplus );
      93             : 
      94             :      // Accessors
      95             : 
      96             :      // mybplus_{node,leaf}_max return the {node,leaf}_max values used
      97             :      // to construct the bplus tree.  Assumes join is a current local
      98             :      // join.  Fast O(1) worst case.
      99             : 
     100             :      ulong mybplus_node_max( mybplus_t const * join );
     101             :      ulong mybplus_leaf_max( mybplus_t const * join );
     102             : 
     103             :      // mybplus_is_empty returns 1 if the bplus tree contains no pairs
     104             :      // and 0 otherwise.  Assumes join is a current local join.  Fast
     105             :      // O(1) worst case.
     106             : 
     107             :      int mybplus_is_empty( mybplus_t const * join );
     108             : 
     109             :      // mybplus_{min,max} return the pointer in the caller's local
     110             :      // address space to the pair in the bplus tree with the {min,max}
     111             :      // key.  Assumes join is a current local join and bplus tree is not
     112             :      // empty.  The lifetime of the returned pointer is the lesser of
     113             :      // the lifetime of the local join or the next insert / upsert /
     114             :      // remove operation on the bplus tree.  The bplus tree retains
     115             :      // ownership of the returned pair and the caller should not modify
     116             :      // the pair key field.  Fast O(1) worst case.
     117             :      //
     118             :      // mybplus_{min,max}_const is a const-correct version.
     119             : 
     120             :      mypair_t const * mybplus_min_const( mybplus_t const * join );
     121             :      mypair_t const * mybplus_max_const( mybplus_t const * join );
     122             :      mypair_t       * mybplus_min      ( mybplus_t       * join );
     123             :      mypair_t       * mybplus_max      ( mybplus_t       * join );
     124             : 
     125             :      // mybplus_query returns the pointer in the caller's local address
     126             :      // space to the pair in the bplus tree that matches the key pointed
     127             :      // to by query or NULL if there is no key matching query in the
     128             :      // bplus tree.  Assumes join is a current local join.  The lifetime
     129             :      // of the returned pointer is the lesser of the lifetime of the
     130             :      // local join or the next insert / upsert / remove operation on the
     131             :      // bplus tree.  The bplus tree retains ownership of the returned
     132             :      // pair and the caller should not modify the key field.  The bplus
     133             :      // tree has no interest in query in return.  Fast O(lg N) worst
     134             :      // case.
     135             :      //
     136             :      // mybplus_query_const is a const-correct version.
     137             : 
     138             :      mypair_t const * mybplus_query_const( mybplus_t const * join, mykey_t const * query );
     139             :      mypair_t *       mybplus_query(       mybplus_t       * join, mykey_t const * query );
     140             : 
     141             :      // Operations
     142             : 
     143             :      // mybplus_insert inserts a key into the bplus tree.  Assumes join
     144             :      // is a current local join and key points in the caller's address
     145             :      // space to the key to insert.  The bplus tree has no interest in
     146             :      // key in return.
     147             :      //
     148             :      // On success, returns the location in the caller's address space
     149             :      // where key was inserted.  The lifetime of the returned pointer is
     150             :      // the lesser of the lifetime of the local join or there is an
     151             :      // insert / upsert / remove operation on the bplus tree.  The
     152             :      // caller should not modify the pair key field but is free to
     153             :      // modify all the other values.
     154             :      //
     155             :      // On failure, returns NULL.  Reasons for failure are the key was
     156             :      // already in the tree (locations of pairs might have changed),
     157             :      // there were not enough nodes (locations of pairs did not change)
     158             :      // or there were not enough leaves available to complete the insert
     159             :      // (locations of pairs did not change).
     160             :      //
     161             :      // mybplus_upsert is nearly equivalent to:
     162             :      //
     163             :      //   int insert;
     164             :      //   mypair_t *    pair = mybplus_query ( join, key ); insert = 0;
     165             :      //   if( !pair ) { pair = mybplus_insert( join, key ); insert = 1; }
     166             :      //   if( pair && _opt_insert ) *_opt_insert = insert;
     167             :      //
     168             :      // but potentially faster as it only traverses the bplus tree once.
     169             :      // The "nearly" qualifier is that, unlike the above snippet, the
     170             :      // upsert might change the location of keys even if key is already
     171             :      // in the bplus tree.  Fast O(lg N) worst case.
     172             : 
     173             :      mypair_t * mybplus_insert( mybplus_t * join, mykey_t const * key );
     174             :      mypair_t * mybplus_upsert( mybplus_t * join, mykey_t const * key, int * _opt_insert );
     175             : 
     176             :      // mybplus_remove_key removes a key from the bplus tree.  Assumes
     177             :      // join is a current local join and key points in the caller's
     178             :      // address space to the key to remove.  Returns 0 on success and -1
     179             :      // if the key was not found in the tree.  The bplus tree has no
     180             :      // interest in key in return.  Fast O(lg N) worst case.
     181             : 
     182             :      int mybplus_remove_key( mybplus_t * join, mykey_t const * key );
     183             : 
     184             :      // mybplus_remove removes the pair pointed to by pair from the
     185             :      // bplus tree.  Assumes join is a current local join and pair is a
     186             :      // pointer in the caller's local address space to a pair that is
     187             :      // currently in the bplus tree.  The pair is no longer in the bplus
     188             :      // tree on return.  Fast O(lg N) worst case.
     189             : 
     190             :      void mybplus_remove( mybplus_t * join, mypair_t * pair );
     191             : 
     192             :      // mybplus_flush removes all pairs from the bplus tree.  Assumes
     193             :      // join is a current local join.  There are no pairs in the bplus
     194             :      // tree on return.  Fast O( node_max + leaf_max ) worst case.
     195             : 
     196             :      void mybplus_flush( mybplus_t * join );
     197             : 
     198             :      // mybplus_verify validates the bplus tree pointed by join.
     199             :      // Returns 0 on success and -1 on failure (logs details).
     200             :      // O(node_max+leaf_max) worst case.
     201             : 
     202             :      int mybplus_verify( mybplus_t const * join );
     203             : 
     204             :      // Iteration
     205             : 
     206             :      // mybplus_iter_nul returns an iterator positioned at nul.  Fast
     207             :      // O(1) worst case.
     208             :      //
     209             :      // mybplus_iter_min returns an iterator positioned at the min pair
     210             :      // or nul if the bplus is empty.  Fast O(1) worst case.
     211             :      //
     212             :      // mybplus_iter_max returns an iterator positioned at the max pair
     213             :      // or nul if the bplus is empty.  Fast O(1) worst case.
     214             :      //
     215             :      // mybplus_iter_ge returns an iterator positioned at the first pair
     216             :      // greater than or equal to query or at nul if all keys are less
     217             :      // than query.  query==NULL is equivalent to "+inf".  Fast O(lg N)
     218             :      // worst case.
     219             :      //
     220             :      // mybplus_iter_gt returns an iterator positioned at the first pair
     221             :      // greater than query or at nul if all keys are less than or equal
     222             :      // to query.  query==NULL is equivalent to "+inf".  Fast O(lg N)
     223             :      // worst case.
     224             :      //
     225             :      // mybplus_iter_le returns an iterator positioned at the last pair
     226             :      // less than or equal to query or at nul if all keys are greater
     227             :      // than query.  query==NULL is equivalent to "-inf".  Fast O(lg N)
     228             :      // worst case.
     229             :      //
     230             :      // mybplus_iter_lt returns an iterator positioned at the last pair
     231             :      // less than to query or at nul if all keys are greater than or
     232             :      // equal to query.  query==NULL is equivalent to "-inf".  Fast
     233             :      // O(lg N) worst case.
     234             :      //
     235             :      // mybplus_iter_next returns an iterator positioned at the next
     236             :      // pair or nul if the iterator is currently positioned at last
     237             :      // pair.  Fast O(1) worst case.
     238             :      //
     239             :      // mybplus_iter_prev returns an iterator positioned at the previous
     240             :      // pair or nul if the iterator is currently positioned at first
     241             :      // pair.  Fast O(1) worst case.
     242             :      //
     243             :      // mybplus_iter_eq returns true if iter is positioned at the same
     244             :      // place fini is positioned.  Fast O(1) worst case.
     245             :      //
     246             :      // mybplus_iter_pair returns a pair associated with the current
     247             :      // iteration position.  mybplus_iter_pair_const is a const correct
     248             :      // version.  Fast O(1) worst case.
     249             :      //
     250             :      // Assumes join is a current local join and query points to a valid
     251             :      // key in the caller's local address space.  Retains no interest in
     252             :      // query on return.
     253             :      //
     254             :      // Example: iterate over all pairs in ascending order:
     255             :      //
     256             :      //   for( mybplus_iter_t iter = mybplus_iter_min( bplus );
     257             :      //        !mybplus_iter_eq_nul( bplus, iter );
     258             :      //        iter = mybplus_iter_next( bplus, iter ) ) {
     259             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     260             :      //     ... process pair here
     261             :      //     ... do not insert, upsert remove keys from bplus here
     262             :      //     ... do not modify key of pair here
     263             :      //   }
     264             :      //
     265             :      // Example: iterate over all pairs in descending order:
     266             :      //
     267             :      //   for( mybplus_iter_t iter = mybplus_iter_max( bplus );
     268             :      //        !mybplus_iter_eq_nul( bplus, iter );
     269             :      //        iter = mybplus_iter_prev( bplus, iter ) ) {
     270             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     271             :      //     ... process pair here
     272             :      //     ... do not insert, upsert remove keys from bplus here
     273             :      //     ... do not modify key of pair here
     274             :      //   }
     275             :      //
     276             :      // Example: iterate over all pairs with keys in [key0,key1) in
     277             :      // ascending order (assumes key1>=key0):
     278             :      //
     279             :      //   mybplus_iter_t iter = mybplus_iter_ge( bplus, key0 );
     280             :      //   mybplus_iter_t fini = mybplus_iter_ge( bplus, key1 ); // key1==NULL will iterate over all pairs with keys >= key0
     281             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     282             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     283             :      //
     284             :      //     ... process pair here
     285             :      //     ... do not insert, upsert or remove keys from bplus here
     286             :      //     ... do not modify key of pair here
     287             :      //
     288             :      //     iter = mybplus_iter_next( bplus, iter );
     289             :      //   }
     290             :      //
     291             :      // Example: iterate over all pairs with keys in [key0,key1] in
     292             :      // ascending order (assumes key1>=key0):
     293             :      //
     294             :      //   mybplus_iter_t iter = mybplus_iter_ge( bplus, key0 );
     295             :      //   mybplus_iter_t fini = mybplus_iter_gt( bplus, key1 ); // key1==NULL will iterate over all pairs with keys >= key0
     296             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     297             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     298             :      //
     299             :      //     ... process pair here
     300             :      //     ... do not insert, upsert or remove keys from bplus here
     301             :      //     ... do not modify key of pair here
     302             :      //
     303             :      //     iter = mybplus_iter_next( bplus, iter );
     304             :      //   }
     305             :      //
     306             :      // Example: iterate over all pairs with keys in (key0,key1) in
     307             :      // ascending order (assumes key1>=key0):
     308             :      //
     309             :      //   mybplus_iter_t iter = mybplus_iter_gt( bplus, key0 );
     310             :      //   mybplus_iter_t fini = mybplus_iter_ge( bplus, key1 ); // key1==NULL will iterate over all pairs with keys > key0
     311             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     312             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     313             :      //
     314             :      //     ... process pair here
     315             :      //     ... do not insert, upsert or remove keys from bplus here
     316             :      //     ... do not modify key of pair here
     317             :      //
     318             :      //     iter = mybplus_iter_next( bplus, iter );
     319             :      //   }
     320             :      //
     321             :      // Example: iterate over all pairs with keys in (key0,key1] in
     322             :      // ascending order (assumes key1>=key0):
     323             :      //
     324             :      //   mybplus_iter_t iter = mybplus_iter_gt( bplus, key0 );
     325             :      //   mybplus_iter_t fini = mybplus_iter_gt( bplus, key1 ); // key1==NULL will iterate over all pairs with keys > key0
     326             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     327             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     328             :      //
     329             :      //     ... process pair here
     330             :      //     ... do not insert, upsert or remove keys from bplus here
     331             :      //     ... do not modify key of pair here
     332             :      //
     333             :      //     iter = mybplus_iter_next( bplus, iter );
     334             :      //   }
     335             :      //
     336             :      // Example: iterate over all pairs with keys in [key0,key1) in
     337             :      // descending order (assumes key1>=key0):
     338             :      //
     339             :      //   mybplus_iter_t iter = mybplus_iter_lt( bplus, key1 );
     340             :      //   mybplus_iter_t fini = mybplus_iter_lt( bplus, key0 ); // key0==NULL will iterate over all pairs with keys < key1
     341             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     342             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     343             :      //
     344             :      //     ... process pair here
     345             :      //     ... do not insert, upsert or remove keys from bplus here
     346             :      //     ... do not modify key of pair here
     347             :      //
     348             :      //     iter = mybplus_iter_prev( bplus, iter );
     349             :      //   }
     350             :      //
     351             :      // Example: iterate over all pairs with keys in [key0,key1] in
     352             :      // descending order (assumes key1>=key0):
     353             :      //
     354             :      //   mybplus_iter_t iter = mybplus_iter_le( bplus, key1 );
     355             :      //   mybplus_iter_t fini = mybplus_iter_lt( bplus, key0 ); // key0==NULL will iterate over all pairs with keys <= key1
     356             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     357             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     358             :      //
     359             :      //     ... process pair here
     360             :      //     ... do not insert, upsert or remove keys from bplus here
     361             :      //     ... do not modify key of pair here
     362             :      //
     363             :      //     iter = mybplus_iter_prev( bplus, iter );
     364             :      //   }
     365             :      //
     366             :      // Example: iterate over all pairs with keys in (key0,key1) in
     367             :      // descending order (assumes key1>=key0):
     368             :      //
     369             :      //   mybplus_iter_t iter = mybplus_iter_lt( bplus, key1 );
     370             :      //   mybplus_iter_t fini = mybplus_iter_le( bplus, key0 ); // key0==NULL will iterate over all pairs with keys < key1
     371             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     372             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     373             :      //
     374             :      //     ... process pair here
     375             :      //     ... do not insert, upsert or remove keys from bplus here
     376             :      //     ... do not modify key of pair here
     377             :      //
     378             :      //     iter = mybplus_iter_prev( bplus, iter );
     379             :      //   }
     380             :      //
     381             :      // Example: iterate over all pairs with keys in (key0,key1] in
     382             :      // descending order (assumes key1>=key0):
     383             :      //
     384             :      //   mybplus_iter_t iter = mybplus_iter_le( bplus, key1 );
     385             :      //   mybplus_iter_t fini = mybplus_iter_le( bplus, key0 ); // key0==NULL will iterate over all pairs with keys < key1
     386             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     387             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     388             :      //
     389             :      //     ... process pair here
     390             :      //     ... do not insert, upsert or remove keys from bplus here
     391             :      //     ... do not modify key of pair here
     392             :      //
     393             :      //     iter = mybplus_iter_prev( bplus, iter );
     394             :      //   }
     395             : 
     396             :      mybplus_iter_t mybplus_iter_nul( mybplus_t const * join );
     397             :      mybplus_iter_t mybplus_iter_min( mybplus_t const * join );
     398             :      mybplus_iter_t mybplus_iter_max( mybplus_t const * join );
     399             : 
     400             :      mybplus_iter_t mybplus_iter_ge( mybplus_t const * join, mykey_t const * query );
     401             :      mybplus_iter_t mybplus_iter_gt( mybplus_t const * join, mykey_t const * query );
     402             :      mybplus_iter_t mybplus_iter_le( mybplus_t const * join, mykey_t const * query );
     403             :      mybplus_iter_t mybplus_iter_lt( mybplus_t const * join, mykey_t const * query );
     404             : 
     405             :      int mybplus_iter_eq    ( mybplus_t const * join, mybplus_iter_t i0, mybplus_iter_t i1 );
     406             :      int mybplus_iter_eq_nul( mybplus_t const * join, mybplus_iter_t iter );
     407             : 
     408             :      mybplus_iter_t mybplus_iter_next( mybplus_t const * join, mybplus_iter_t iter );
     409             :      mybplus_iter_t mybplus_iter_prev( mybplus_t const * join, mybplus_iter_t iter );
     410             : 
     411             :      mypair_t const * mybplus_iter_pair_const( mybplus_t const * join, mybplus_iter_t iter );
     412             :      mypair_t       * mybplus_iter_pair      ( mybplus_t *       join, mybplus_iter_t iter );
     413             : 
     414             :    You can do this as often as you like in a compilation unit to get
     415             :    different types of bplus trees.  Variants exist for making header
     416             :    protoypes only and/or implementations if doing a library with
     417             :    multiple compilation units.  Further, options exist to use different
     418             :    hashing functions, comparison functions, etc as detailed below. */
     419             : 
     420             : /* BPLUS_NAME gives the API prefix. */
     421             : 
     422             : #ifndef BPLUS_NAME
     423             : #error "Define BPLUS_NAME"
     424             : #endif
     425             : 
     426             : /* BPLUS_KEY_T gives the key type.  Should be a plain-old-data type with
     427             :    a total order. */
     428             : 
     429             : #ifndef BPLUS_KEY_T
     430             : #error "Define BPLUS_KEY_T"
     431             : #endif
     432             : 
     433             : /* BPLUS_PAIR_T gives the pair type.  Should be a structure of the form:
     434             : 
     435             :      typedef struct BPLUS_PAIR {
     436             :        BPLUS_KEY_T key; // Can be arbitrarily placed in structure, should not be modified by the user
     437             :        ... arbitrary user fields
     438             :      } BPLUS_PAIR_T;
     439             : 
     440             :   (Or the appropriate field name given BPLUS_PAIR_KEY below.) */
     441             : 
     442             : #ifndef BPLUS_PAIR_T
     443             : #error "Define BPLUS_PAIR_T"
     444             : #endif
     445             : 
     446             : /* BPLUS_PAIR_KEY gives the name of the key field in the BPLUS_PAIR_KEY.
     447             :    Defaults to key. */
     448             : 
     449             : #ifndef BPLUS_PAIR_KEY
     450             : #define BPLUS_PAIR_KEY key
     451             : #endif
     452             : 
     453             : /* BPLUS_KEY_CMP compares the keys pointed to by a and b and returns
     454             :    {<0,0,>0} if the a is {less than,equal to,greater than}.  a and b
     455             :    will be valid pointers to key .  Defaults to memcmp based. */
     456             : 
     457             : #ifndef BPLUS_KEY_CMP
     458             : #define BPLUS_KEY_CMP(a,b) memcmp( (a), (b), sizeof(*(a)) )
     459             : #endif
     460             : 
     461             : /* BPLUS_TREE_MAX is the maximum number of children a non-leaf node can
     462             :    have.  Must be even, >=4 and <<< ULONG_MAX.  Defaults to 128. */
     463             : 
     464             : #ifndef BPLUS_TREE_MAX
     465             : #define BPLUS_TREE_MAX 128
     466             : #endif
     467             : 
     468             : /* BPLUS_PAIR_MAX is the maximum number of children a leaf node can
     469             :    have.  Must be even, >=4 and <<< ULONG_MAX.  Defaults to 128. */
     470             : 
     471             : #ifndef BPLUS_PAIR_MAX
     472             : #define BPLUS_PAIR_MAX 128
     473             : #endif
     474             : 
     475             : /* BPLUS_ALIGN gives the default alignment of the BPLUS region.  Should
     476             :    be a positive integer power of 2.  Defaults to 128. */
     477             : 
     478             : #ifndef BPLUS_ALIGN
     479           9 : #define BPLUS_ALIGN 128
     480             : #endif
     481             : 
     482             : /* BPLUS_NODE_ALIGN gives the default alignment of an interior node.
     483             :    Should be a positive integer power of 2 of at most BPLUS_ALIGN.
     484             :    Defaults to 128. */
     485             : 
     486             : #ifndef BPLUS_NODE_ALIGN
     487   587278872 : #define BPLUS_NODE_ALIGN 128
     488             : #endif
     489             : 
     490             : /* BPLUS_LEAF_ALIGN gives the default alignment of a leaf node.  Should
     491             :    be a positive integer power of 2 of at most BPLUS_ALIGN.  Defaults to
     492             :    128. */
     493             : 
     494             : #ifndef BPLUS_LEAF_ALIGN
     495   587278872 : #define BPLUS_LEAF_ALIGN 128
     496             : #endif
     497             : 
     498             : /* BPLUS_MAGIC is the structure magic number to use to aid in persistent
     499             :    and or IPC usage. */
     500             : 
     501             : #ifndef BPLUS_MAGIC
     502           3 : #define BPLUS_MAGIC (0xfdb91c53a61c0000UL) /* FD BPLUS MAGIC 0000 */
     503             : #endif
     504             : 
     505             : /* BPLUS_IMPL_STYLE indicates what this generator should output:
     506             :      0 - static implementation
     507             :      1 - library header
     508             :      2 - library implementation */
     509             : 
     510             : #ifndef BPLUS_IMPL_STYLE
     511             : #define BPLUS_IMPL_STLYE 0
     512             : #endif
     513             : 
     514             : /**********************************************************************/
     515             : 
     516 20959878411 : #define BPLUS_(name)FD_EXPAND_THEN_CONCAT3(BPLUS_NAME,_,name)
     517             : 
     518             : #if BPLUS_IMPL_STYLE==0
     519             : #define BPLUS_STATIC FD_FN_UNUSED static
     520             : #else
     521             : #define BPLUS_STATIC
     522             : #endif
     523             : 
     524             : #if BPLUS_IMPL_STYLE==0 || BPLUS_IMPL_STYLE==1
     525             : 
     526             : /* Header *************************************************************/
     527             : 
     528             : #include "../log/fd_log.h"
     529             : 
     530             : struct BPLUS_(private);
     531             : typedef struct BPLUS_(private) BPLUS_(t);
     532             : 
     533             : struct BPLUS_(private_iter);
     534             : typedef struct BPLUS_(private_iter) BPLUS_(iter_t);
     535             : 
     536             : /* Internal use only */
     537             : 
     538             : /* A bplus_private_node_t is used for finding leaves that might contain
     539             :    an element fast. */
     540             : 
     541             : struct __attribute__((aligned(BPLUS_NODE_ALIGN))) BPLUS_(private_node) {
     542             : 
     543             :   /* This point is BPLUS_NODE_ALIGN aligned */
     544             : 
     545             :   ulong       tree_cnt;                     /* if acquired, in [0,BPLUS_TREE_MAX],  else ignored */
     546             :   ulong       tree_off[ BPLUS_TREE_MAX   ]; /* if acquired, indexed [0,tree_cnt),   else
     547             :                                                tree_off[0]==node pool next offset (0 if last node in pool) */
     548             :   BPLUS_KEY_T pivot   [ BPLUS_TREE_MAX-1 ]; /* if acquired, indexed [0,tree_cnt-1), else ignored */
     549             : 
     550             :   /* tree i handles keys in [ pivot[i-1], pivot[i] ), pivot[-1] /
     551             :      pivot[tree_cnt-1] are implied to be the previous / next pivot in an
     552             :      in-order traversal of the bplus tree node pivots (or -/+inf if
     553             :      leftmost/rightmost). */
     554             : };
     555             : 
     556             : typedef struct BPLUS_(private_node) BPLUS_(private_node_t);
     557             : 
     558             : /* A bplus_private_leaf_t holds up to pair_cnt elements of pairs in the
     559             :    tree in a sorted order. */
     560             : 
     561             : struct __attribute__((aligned(BPLUS_LEAF_ALIGN))) BPLUS_(private_leaf) {
     562             : 
     563             :   /* This point is BPLUS_LEAF_ALIGN aligned */
     564             : 
     565             :   ulong        pair_cnt;               /* if acquired, in [0,BPLUS_PAIR_MAX],                                    else ignored */
     566             :   ulong        prev_off;               /* if acquired, prev leaf offset (or 0 if first leaf),                    else ignored */
     567             :   ulong        next_off;               /* if acquired, next leaf offset (or 0 if last  leaf),
     568             :                                           else leaf pool next offset (0 if last node in pool) */
     569             :   BPLUS_PAIR_T pair[ BPLUS_PAIR_MAX ]; /* if acquired, indexed [0,pair_cnt), unique keys and in ascending order, else ignored */
     570             : };
     571             : 
     572             : typedef struct BPLUS_(private_leaf) BPLUS_(private_leaf_t);
     573             : 
     574             : /* A bplus_private_t is a continguous region of memory that holds a
     575             :    bplus tree.  Important invariants:
     576             : 
     577             :    - Empty trees have no root.
     578             :    - If root is a leaf, it has [1,pair_max] pairs.
     579             :    - If root is a node, it has [2,tree_max] trees.
     580             :    - Non-root nodes  have [tree_min,tree_max] trees.
     581             :    - Non-root leaves have [pair_min,pair_max] pairs.
     582             :    - Children of a node are not a mix of nodes and leaves. */
     583             : 
     584             : struct __attribute__((aligned(BPLUS_ALIGN))) BPLUS_(private) {
     585             : 
     586             :   /* This point is aligned BPLUS_ALIGN */
     587             : 
     588             :   ulong magic;                              /* ==BPLUS_MAGIC */
     589             :   ulong node_max;      ulong leaf_max;      /* maximum number of node/leaf in the store */
     590             :   ulong node_lo;       ulong leaf_lo;       /* offset from the first byte of bplus header to the node/leaf storage */
     591             :   ulong node_pool_off; ulong leaf_pool_off; /* first node/leaf in node/leaf pool, 0 if no node/leaf in pool */
     592             :   ulong root_off;                           /* offset of node/leaf to tree root (or 0 if empty) */
     593             :   ulong leaf_min_off;                       /* offset of leaf with minimum pair (or 0 if empty) */
     594             :   ulong leaf_max_off;                       /* offset of leaf with maximum pair (or 0 if empty) */
     595             : 
     596             :   /* padding to BPLUS_NODE_ALIGN here */
     597             :   /* node_lo points here, node_max elements, indexed [0,node_max) */
     598             :   /* padding to BPLUS_LEAF_ALIGN here */
     599             :   /* leaf_lo points here, leaf_max elements, indexed [0,leaf_max) */
     600             :   /* padding to BPLUS_ALIGN here */
     601             : 
     602             : };
     603             : 
     604             : typedef struct BPLUS_(private) BPLUS_(private_t);
     605             : 
     606             : struct BPLUS_(private_iter) {
     607             :   ulong leaf_off; /* offset to current leaf */
     608             :   ulong pair_idx; /* current pair in current leaf */
     609             : };
     610             : 
     611             : FD_PROTOTYPES_BEGIN
     612             : 
     613             : /* bplus_private_{pair,tree}_{min,max} return the corresponding
     614             :    configuration values for this bplus implementation. */
     615             : 
     616           0 : FD_FN_CONST static inline ulong BPLUS_(private_pair_min)( void ) { return (ulong)(BPLUS_PAIR_MAX/2); } /* exact */
     617           0 : FD_FN_CONST static inline ulong BPLUS_(private_pair_max)( void ) { return (ulong) BPLUS_PAIR_MAX;    }
     618             : 
     619           0 : FD_FN_CONST static inline ulong BPLUS_(private_tree_min)( void ) { return (ulong)(BPLUS_TREE_MAX/2); } /* exact */
     620           0 : FD_FN_CONST static inline ulong BPLUS_(private_tree_max)( void ) { return (ulong) BPLUS_TREE_MAX;    }
     621             : 
     622             : /* bplus_private_{node,leaf}_max_max return a value for {node,leaf}_max
     623             :    such that the {node,leaf} storage of the bplus tree will require at
     624             :    most 2^62 bytes. */
     625             : 
     626             : FD_FN_CONST static inline ulong
     627           0 : BPLUS_(private_node_max_max)( void ) {
     628           0 :   return ((1UL<<62)-BPLUS_NODE_ALIGN+1UL) / sizeof( BPLUS_(private_node_t));
     629           0 : }
     630             : 
     631             : FD_FN_CONST static inline ulong
     632           0 : BPLUS_(private_leaf_max_max)( void ) {
     633           0 :   return ((1UL<<62)-BPLUS_LEAF_ALIGN+1UL) / sizeof( BPLUS_(private_leaf_t));
     634           0 : }
     635             : 
     636             : /* bplus_private_key_cmp gives BPLUS_KEY_CMP the exact function
     637             :    signature used by the below implementations. */
     638             : 
     639             : FD_FN_PURE static inline int
     640             : BPLUS_(private_key_cmp)( BPLUS_KEY_T const * a,
     641  9880452882 :                          BPLUS_KEY_T const * b ) {
     642  9880452882 :   return BPLUS_KEY_CMP(a,b);
     643  9880452882 : }
     644             : 
     645             : /* bplus_private_is_leaf returns 1 if the root of the tree at bplus
     646             :    global offset is a leaf or 0 if it is a node.  leaf_lo is the bplus
     647             :    global offset of the leaf preallocated storage.  Assumes tree_off and
     648             :    leaf_lo are valid. */
     649             : 
     650  4640923854 : FD_FN_CONST static inline int BPLUS_(private_is_leaf)( ulong tree_off, ulong leaf_lo ) { return tree_off>=leaf_lo; }
     651             : 
     652             : /* bplus_private returns location of the bplus private metadata in the
     653             :    caller's address space given a valid local join.  Lifetime of the
     654             :    returned pointer is the lifetime of the join.  bplus_private_const is
     655             :    a const correct version. */
     656             : 
     657             : FD_FN_CONST static inline BPLUS_(private_t) *
     658    16917414 : BPLUS_(private)( BPLUS_(t) * join ) {
     659    16917414 :   return (BPLUS_(private_t) *)join;
     660    16917414 : }
     661             : 
     662             : FD_FN_CONST static inline BPLUS_(private_t) const *
     663    80622819 : BPLUS_(private_const)( BPLUS_(t) const * join ) {
     664    80622819 :   return (BPLUS_(private_t) const *)join;
     665    80622819 : }
     666             : 
     667             : /* bplus_private_{node,leaf} return the pointer in the caller's local
     668             :    address space of the {node,leaf} located at bplus global
     669             :    {node,leaf}_off.  The lifetime of the returned pointer is the
     670             :    lifetime of the local join.  Assumes bplus and node_off are valid. */
     671             : 
     672             : FD_FN_CONST static inline BPLUS_(private_node_t) *
     673             : BPLUS_(private_node)( BPLUS_(private_t) * bplus,
     674    71810601 :                       ulong               node_off ) {
     675    71810601 :   return (BPLUS_(private_node_t) *)((ulong)bplus + node_off);
     676    71810601 : }
     677             : 
     678             : FD_FN_CONST static inline BPLUS_(private_leaf_t) *
     679             : BPLUS_(private_leaf)( BPLUS_(private_t) * bplus,
     680    23130888 :                       ulong               leaf_off ) {
     681    23130888 :   return (BPLUS_(private_leaf_t) *)((ulong)bplus + leaf_off);
     682    23130888 : }
     683             : 
     684             : FD_FN_CONST static inline BPLUS_(private_node_t) const *
     685             : BPLUS_(private_node_const)( BPLUS_(private_t) const * bplus,
     686  4264239753 :                             ulong                     node_off ) {
     687  4264239753 :   return (BPLUS_(private_node_t) const *)((ulong)bplus + node_off);
     688  4264239753 : }
     689             : 
     690             : FD_FN_CONST static inline BPLUS_(private_leaf_t) const *
     691             : BPLUS_(private_leaf_const)( BPLUS_(private_t) const * bplus,
     692  7090158669 :                             ulong                     leaf_off ) {
     693  7090158669 :   return (BPLUS_(private_leaf_t) const *)((ulong)bplus + leaf_off);
     694  7090158669 : }
     695             : 
     696             : /* bplus_private_off returns the bplus global offset for the given
     697             :    address in the caller's address space.  Assumes bplus is valid and
     698             :    addr is non-NULL and into the bplus memory region. */
     699             : 
     700             : FD_FN_CONST static inline ulong
     701             : BPLUS_(private_off)( BPLUS_(private_t) const * bplus,
     702     3380268 :                      void const *              addr ) {
     703     3380268 :   return (ulong)addr - (ulong)bplus;
     704     3380268 : }
     705             : 
     706             : /* bplus_private_node_acquire acquires a node from the bplus's node pool
     707             :    and returns a pointer to it in the caller's address space.  Assumes
     708             :    bplus is valid.  Returns NULL if bplus node pool is empty.
     709             : 
     710             :    bplus_private_node_release releases a node to the bplus's node pool.
     711             :    Assumes bplus is valid, node is valid and node is not currently in
     712             :    the pool.
     713             : 
     714             :    Similarly for bplus_private_leaf_{acquire,release}. */
     715             : 
     716             : static inline BPLUS_(private_node_t) *
     717      232035 : BPLUS_(private_node_acquire)( BPLUS_(private_t) * bplus ) {
     718      232035 :   ulong node_off = bplus->node_pool_off;
     719      232035 :   if( FD_UNLIKELY( !node_off ) ) return NULL;
     720      232035 :   BPLUS_(private_node_t *) node = BPLUS_(private_node)( bplus, node_off );
     721      232035 :   bplus->node_pool_off = node->tree_off[0];
     722      232035 :   return node;
     723      232035 : }
     724             : 
     725             : static inline void
     726             : BPLUS_(private_node_release)( BPLUS_(private_t)      * bplus,
     727      236961 :                               BPLUS_(private_node_t) * node ) {
     728      236961 :   node->tree_off[0]    = bplus->node_pool_off;
     729      236961 :   bplus->node_pool_off = BPLUS_(private_off)( bplus, node );
     730      236961 : }
     731             : 
     732             : static inline BPLUS_(private_leaf_t) *
     733      967224 : BPLUS_(private_leaf_acquire)( BPLUS_(private_t) * bplus ) {
     734      967224 :   ulong leaf_off = bplus->leaf_pool_off;
     735      967224 :   if( FD_UNLIKELY( !leaf_off ) ) return NULL;
     736      967221 :   BPLUS_(private_leaf_t *) leaf = BPLUS_(private_leaf)( bplus, leaf_off );
     737      967221 :   bplus->leaf_pool_off = leaf->next_off;
     738      967221 :   return leaf;
     739      967224 : }
     740             : 
     741             : static inline void
     742             : BPLUS_(private_leaf_release)( BPLUS_(private_t)      * bplus,
     743      976146 :                               BPLUS_(private_leaf_t) * leaf ) {
     744      976146 :   leaf->next_off       = bplus->leaf_pool_off;
     745      976146 :   bplus->leaf_pool_off = BPLUS_(private_off)( bplus, leaf );
     746      976146 : }
     747             : 
     748             : /* bplus_private_insert inserts or upserts a key into a bplus tree.
     749             :    Assumes join is a current local join and key points to a valid key in
     750             :    the caller's address space and upsert is in [0,1].
     751             : 
     752             :    upsert 0: key will inserted into bplus.  On success, returns the pair
     753             :    where key was inserted and, on return, *_insert will be 1.  Caller
     754             :    can update all fields in the pair except the key.  Lifetime of the
     755             :    returned pointer is until the next insert / upsert / remove.  Returns
     756             :    NULL if there was no room in the bplus tree or if key was already in
     757             :    the bplus tree (might have moved pairs around in bplus tree on
     758             :    failure) and _insert will be untouched.
     759             : 
     760             :    upsert 1: key will inserted or updated into bplus.  If key is already
     761             :    present in the bplus tree, returns the location in the caller's
     762             :    address space of the pair with the matching key and, on return,
     763             :    *_insert will be 0.  If not, inserts the key and requires the
     764             :    location in the caller's address space where pair was inserted and,
     765             :    on return, *_insert will be 1.  In both cases, the lifetime of the
     766             :    returned pointer is until the next insert / upsert / remove.  Returns
     767             :    NULL if there was no room in the bplus tree to insert (might have
     768             :    moved pairs around in bplus tree on failure) and _insert will be
     769             :    untouched.
     770             : 
     771             :    The bplus retains no interest in query on return. */
     772             : 
     773             : BPLUS_STATIC BPLUS_PAIR_T *
     774             : BPLUS_(private_insert)( BPLUS_(t)         * join,
     775             :                         BPLUS_KEY_T const * key,
     776             :                         int                 upsert,
     777             :                         int *               _insert );
     778             : 
     779             : /* bplus_private_iter returns the iterator corresponding to query and
     780             :    op.  Assumes join is a current local join, query points to a valid
     781             :    key in the caller's address space or is NULL and op is in [0,3].
     782             :    Returns an iter positioned at:
     783             : 
     784             :      op     | position
     785             :      -------+-------------------------------------------------------------------------------------------------------------------
     786             :      0 (GE) | the first pair with a key greater than or equal to query (or nul if all have keys less than query)
     787             :      1 (GT) | the first pair with a key greater than             query (or nul if all have keys less than or equal to query)
     788             :      2 (LE) | the last  pair with a key less    than or equal to query (or nul if all have keys greater than query)
     789             :      3 (LT) | the last  pair with a key less    than             query (or nul if all have keys greater than or equal to query)
     790             : 
     791             :    If query is NULL, iteration will be positioned as though:
     792             : 
     793             :      op     | query
     794             :      -------+-------
     795             :      0 (GE) | +inf
     796             :      1 (GT) | +inf
     797             :      2 (LE) | -inf
     798             :      3 (LT) | -inf
     799             : 
     800             :    The bplus retains no interest in query on return. */
     801             : 
     802             : FD_FN_PURE BPLUS_STATIC BPLUS_(iter_t)
     803             : BPLUS_(private_iter)( BPLUS_(t)   const * join,
     804             :                       BPLUS_KEY_T const * query,
     805             :                       int                 op );
     806             : 
     807             : FD_PROTOTYPES_END
     808             : 
     809             : /* End internal use only */
     810             : 
     811             : FD_PROTOTYPES_BEGIN
     812             : 
     813             : /* Constructors */
     814             : 
     815             : FD_FN_CONST BPLUS_STATIC ulong BPLUS_(leaf_max_est)( ulong ele_max_est );
     816             : FD_FN_CONST BPLUS_STATIC ulong BPLUS_(node_max_est)( ulong ele_max_est );
     817             : 
     818             : FD_FN_CONST BPLUS_STATIC ulong BPLUS_(align)    ( void );
     819             : FD_FN_CONST BPLUS_STATIC ulong BPLUS_(footprint)( ulong node_max, ulong leaf_max );
     820             : 
     821             : BPLUS_STATIC void      * BPLUS_(new)   ( void *      shmem, ulong node_max, ulong leaf_max );
     822             : BPLUS_STATIC BPLUS_(t) * BPLUS_(join)  ( void *      shbplus );
     823             : BPLUS_STATIC void      * BPLUS_(leave) ( BPLUS_(t) * join );
     824             : BPLUS_STATIC void      * BPLUS_(delete)( void *      shbplus );
     825             : 
     826             : /* Accessors */
     827             : 
     828     1873254 : FD_FN_PURE static inline ulong BPLUS_(node_max)( BPLUS_(t) const * join ) { return BPLUS_(private_const)( join )->node_max; }
     829     1873254 : FD_FN_PURE static inline ulong BPLUS_(leaf_max)( BPLUS_(t) const * join ) { return BPLUS_(private_const)( join )->leaf_max; }
     830             : 
     831    16869642 : FD_FN_PURE static inline int BPLUS_(is_empty)( BPLUS_(t) const * join ) { return !BPLUS_(private_const)( join )->root_off; }
     832             : 
     833             : FD_FN_PURE static inline BPLUS_PAIR_T const *
     834     7497702 : BPLUS_(min_const)( BPLUS_(t) const * join ) {
     835     7497702 :   BPLUS_(t) const * bplus = BPLUS_(private_const)( join );
     836     7497702 :   BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, bplus->leaf_min_off );
     837     7497702 :   return &leaf->pair[0];
     838     7497702 : }
     839             : 
     840             : FD_FN_PURE static inline BPLUS_PAIR_T const *
     841     7497702 : BPLUS_(max_const)( BPLUS_(t) const * join ) {
     842     7497702 :   BPLUS_(t) const * bplus = BPLUS_(private_const)( join );
     843     7497702 :   BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, bplus->leaf_max_off );
     844     7497702 :   return &leaf->pair[ leaf->pair_cnt-1UL ];
     845     7497702 : }
     846             : 
     847     3748851 : FD_FN_PURE static inline BPLUS_PAIR_T * BPLUS_(min)( BPLUS_(t) * join ) { return (BPLUS_PAIR_T *)BPLUS_(min_const)( join ); }
     848     3748851 : FD_FN_PURE static inline BPLUS_PAIR_T * BPLUS_(max)( BPLUS_(t) * join ) { return (BPLUS_PAIR_T *)BPLUS_(max_const)( join ); }
     849             : 
     850             : FD_FN_PURE BPLUS_STATIC BPLUS_PAIR_T const * BPLUS_(query_const)( BPLUS_(t) const * join, BPLUS_KEY_T const * query );
     851             : 
     852             : FD_FN_PURE static inline BPLUS_PAIR_T *
     853             : BPLUS_(query)( BPLUS_(t)         * join,
     854    13124622 :                BPLUS_KEY_T const * query ) {
     855    13124622 :   return (BPLUS_PAIR_T *)BPLUS_(query_const)( join, query );
     856    13124622 : }
     857             : 
     858             : /* Operations */
     859             : 
     860             : static inline BPLUS_PAIR_T *
     861             : BPLUS_(insert)( BPLUS_(t) *         join,
     862     3773271 :                 BPLUS_KEY_T const * key ) {
     863     3773271 :   int dummy;
     864     3773271 :   return BPLUS_(private_insert)( join, key, 0, &dummy );
     865     3773271 : }
     866             : 
     867             : static inline BPLUS_PAIR_T *
     868             : BPLUS_(upsert)( BPLUS_(t) *         join,
     869             :                 BPLUS_KEY_T const * key,
     870     3751845 :                 int *               _opt_insert ) {
     871     3751845 :   int dummy;
     872     3751845 :   if( !_opt_insert ) _opt_insert = &dummy; /* compile time */
     873     3751845 :   return BPLUS_(private_insert)( join, key, 1, _opt_insert );
     874     3751845 : }
     875             : 
     876             : BPLUS_STATIC int BPLUS_(remove_key)( BPLUS_(t) * join, BPLUS_KEY_T const * key );
     877             : 
     878     1873158 : static inline void BPLUS_(remove)( BPLUS_(t) * join, BPLUS_PAIR_T * pair ) { BPLUS_(remove_key)( join, &pair->BPLUS_PAIR_KEY ); }
     879             : 
     880             : BPLUS_STATIC void BPLUS_(flush)( BPLUS_(t) * join );
     881             : 
     882             : FD_FN_PURE BPLUS_STATIC int BPLUS_(verify)( BPLUS_(t) const * join );
     883             : 
     884             : /* Iteration */
     885             : /* FIXME: FD_FN_CONST for nul/eq/eq_nul/pair/pair_const?  FD_FN_PURE for
     886             :    min/max/prev/next? */
     887             : 
     888             : static inline BPLUS_(iter_t)
     889     1875756 : BPLUS_(iter_nul)( BPLUS_(t) const * join ) {
     890     1875756 :   (void)join;
     891     1875756 :   BPLUS_(iter_t) iter;
     892     1875756 :   iter.leaf_off = 0UL;
     893     1875756 :   iter.pair_idx = 0UL;
     894     1875756 :   return iter;
     895     1875756 : }
     896             : 
     897             : static inline BPLUS_(iter_t)
     898     1875756 : BPLUS_(iter_min)( BPLUS_(t) const * join ) {
     899     1875756 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
     900     1875756 :   ulong leaf_off = bplus->leaf_min_off;
     901     1875756 :   BPLUS_(iter_t) iter;
     902     1875756 :   iter.leaf_off = leaf_off;
     903     1875756 :   iter.pair_idx = 0UL;
     904     1875756 :   return iter;
     905     1875756 : }
     906             : 
     907             : static inline BPLUS_(iter_t)
     908     1875756 : BPLUS_(iter_max)( BPLUS_(t) const * join ) {
     909     1875756 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
     910     1875756 :   ulong leaf_off = bplus->leaf_max_off;
     911     1875756 :   BPLUS_(iter_t) iter;
     912     1875756 :   iter.leaf_off = leaf_off;
     913     1875756 :   iter.pair_idx = (FD_UNLIKELY( !leaf_off ) ? 1UL : BPLUS_(private_leaf_const)( bplus, leaf_off )->pair_cnt) - 1UL;
     914     1875756 :   return iter;
     915     1875756 : }
     916             : 
     917             : FD_FN_PURE static inline BPLUS_(iter_t)
     918             : BPLUS_(iter_ge)( BPLUS_(t)   const * join,
     919     3749115 :                  BPLUS_KEY_T const * query ) {
     920     3749115 :   return BPLUS_(private_iter)( join, query, 0 );
     921     3749115 : }
     922             : 
     923             : FD_FN_PURE static inline BPLUS_(iter_t)
     924             : BPLUS_(iter_gt)( BPLUS_(t)   const * join,
     925     3749115 :                  BPLUS_KEY_T const * query ) {
     926     3749115 :   return BPLUS_(private_iter)( join, query, 1 );
     927     3749115 : }
     928             : 
     929             : FD_FN_PURE static inline BPLUS_(iter_t)
     930             : BPLUS_(iter_le)( BPLUS_(t)   const * join,
     931     3749115 :                  BPLUS_KEY_T const * query ) {
     932     3749115 :   return BPLUS_(private_iter)( join, query, 2 );
     933     3749115 : }
     934             : 
     935             : FD_FN_PURE static inline BPLUS_(iter_t)
     936             : BPLUS_(iter_lt)( BPLUS_(t)   const * join,
     937     3749115 :                  BPLUS_KEY_T const * query ) {
     938     3749115 :   return BPLUS_(private_iter)( join, query, 3 );
     939     3749115 : }
     940             : 
     941             : static inline int
     942             : BPLUS_(iter_eq)( BPLUS_(t) const * join,
     943             :                  BPLUS_(iter_t)    iter,
     944    11251554 :                  BPLUS_(iter_t)    fini ) {
     945    11251554 :   (void)join;
     946    11251554 :   return (iter.leaf_off==fini.leaf_off) & (iter.pair_idx==fini.pair_idx);
     947    11251554 : }
     948             : 
     949             : static inline int
     950             : BPLUS_(iter_eq_nul)( BPLUS_(t) const * join,
     951    14998770 :                      BPLUS_(iter_t)    iter ) {
     952    14998770 :   (void)join;
     953    14998770 :   return !iter.leaf_off;
     954    14998770 : }
     955             : 
     956             : static inline BPLUS_(iter_t)
     957             : BPLUS_(iter_next)( BPLUS_(t) const * join,
     958     3751176 :                    BPLUS_(iter_t)    iter ) {
     959     3751176 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
     960             : 
     961     3751176 :   ulong leaf_off = iter.leaf_off;
     962     3751176 :   ulong pair_idx = iter.pair_idx;
     963             : 
     964     3751176 :   BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, leaf_off );
     965             : 
     966     3751176 :   pair_idx++;
     967     3751176 :   if( FD_UNLIKELY( pair_idx>=leaf->pair_cnt ) ) { /* optimize for high radix */
     968     3751176 :     leaf_off = leaf->next_off;
     969     3751176 :     pair_idx = 0UL;
     970     3751176 :   }
     971             : 
     972     3751176 :   iter.leaf_off = leaf_off;
     973     3751176 :   iter.pair_idx = pair_idx;
     974     3751176 :   return iter;
     975     3751176 : }
     976             : 
     977             : static inline BPLUS_(iter_t)
     978             : BPLUS_(iter_prev)( BPLUS_(t) const * join,
     979     1875681 :                    BPLUS_(iter_t)    iter ) {
     980     1875681 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
     981             : 
     982     1875681 :   ulong leaf_off = iter.leaf_off;
     983     1875681 :   ulong pair_idx = iter.pair_idx;
     984             : 
     985     1875681 :   BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, leaf_off );
     986             : 
     987     1875681 :   if( FD_UNLIKELY( !pair_idx ) ) { /* optimize for high radix */
     988     1875681 :     leaf_off = leaf->prev_off;
     989     1875681 :     pair_idx = FD_UNLIKELY( !leaf_off ) ? 1UL : BPLUS_(private_leaf_const)( bplus, leaf_off )->pair_cnt;
     990     1875681 :   }
     991     1875681 :   pair_idx--;
     992             : 
     993     1875681 :   iter.leaf_off = leaf_off;
     994     1875681 :   iter.pair_idx = pair_idx;
     995     1875681 :   return iter;
     996     1875681 : }
     997             : 
     998             : static inline BPLUS_PAIR_T const *
     999             : BPLUS_(iter_pair_const)( BPLUS_(t) const * join,
    1000    11243061 :                          BPLUS_(iter_t)    iter ) {
    1001    11243061 :   return BPLUS_(private_leaf_const)( BPLUS_(private_const)( join ), iter.leaf_off )->pair + iter.pair_idx;
    1002    11243061 : }
    1003             : 
    1004             : static inline BPLUS_PAIR_T *
    1005             : BPLUS_(iter_pair)( BPLUS_(t) *    join,
    1006     3751362 :                    BPLUS_(iter_t) iter ) {
    1007     3751362 :   return BPLUS_(private_leaf)( BPLUS_(private)( join ), iter.leaf_off )->pair + iter.pair_idx;
    1008     3751362 : }
    1009             : 
    1010             : FD_PROTOTYPES_END
    1011             : 
    1012             : #endif
    1013             : 
    1014             : #if BPLUS_IMPL_STYLE==0 || BPLUS_IMPL_STYLE==2
    1015             : 
    1016             : /* Implementation *****************************************************/
    1017             : 
    1018             : /* bplus_private_node_query returns the index of a node's child tree,
    1019             :    in [0,tree_cnt), that might contain query.
    1020             : 
    1021             :      tree 0          covers keys [ -inf,              pivot[0] )
    1022             :           i          covers keys [ pivot[i-1],        pivot[i] )
    1023             :           tree_cnt-1 covers keys [ pivot[tree_cnt-2], +inf     )
    1024             : 
    1025             :    Assumes pivot contains unique keys in ascending order, tree_cnt is in
    1026             :    [2,tree_max], tree_max <<< ULONG_MAX and query is valid. */
    1027             : 
    1028             : FD_FN_PURE static ulong
    1029             : BPLUS_(private_node_query)( BPLUS_KEY_T const * FD_RESTRICT pivot,
    1030             :                             ulong                           tree_cnt,
    1031   232074357 :                             BPLUS_KEY_T const * FD_RESTRICT query ) {
    1032   232074357 :   ulong i0 = 0UL;
    1033   232074357 :   ulong i1 = tree_cnt;
    1034             : 
    1035   431911257 :   do {
    1036             : 
    1037             :     /* At this point, query might be found in trees in [i0,i1) and this
    1038             :        range contains at least two trees.  Test the middle tree.  If it
    1039             :        matches exactly, we are done.  Otherwise, recurse on the
    1040             :        appropriate half of the range. */
    1041             : 
    1042   431911257 :     ulong im = (i0+i1) >> 1; /* No overflow, at least 1 */
    1043             : 
    1044   431911257 :     int cmp = BPLUS_(private_key_cmp)( query, &pivot[im-1UL] );
    1045   431911257 :     if( FD_UNLIKELY( !cmp ) ) return im; /* (optional) early abort, optimize for big trees */
    1046   426659160 :     i0 = fd_ulong_if( cmp<0, i0, im );
    1047   426659160 :     i1 = fd_ulong_if( cmp<0, im, i1 );
    1048             : 
    1049   426659160 :   } while( FD_LIKELY( (i1-i0)>1UL) ); /* optimize for big trees */
    1050             : 
    1051   226822260 :   return i0;
    1052   232074357 : }
    1053             : 
    1054             : /* bplus_private_pair_query returns the index of a leaf's pair, in
    1055             :    [0,pair_cnt), that exactly matches query or pair if there is no
    1056             :    matching pair.  Assumes pair keys are unique and ascending sorted,
    1057             :    pair_cnt is in [1,pair_max], pair_max <<< ULONG_MAX and query is
    1058             :    valid. */
    1059             : 
    1060             : FD_FN_PURE static ulong
    1061             : BPLUS_(private_pair_query)( BPLUS_PAIR_T const * FD_RESTRICT pair,
    1062             :                             ulong                            pair_cnt,
    1063    20654814 :                             BPLUS_KEY_T  const * FD_RESTRICT query ) {
    1064    20654814 :   ulong i0 = 0UL;
    1065    20654814 :   ulong i1 = pair_cnt;
    1066             : 
    1067    33977313 :   do {
    1068             : 
    1069             :     /* At this point, query might match one of the pairs in [i0,i1) and
    1070             :        this range is not empty.  Test the pair in the middle.  If it
    1071             :        matches, we found the pair.  Otherwise, recurse appropriate half
    1072             :        of the range (exclusive of our query). */
    1073             : 
    1074    33977313 :     ulong im = (i0+i1) >> 1; /* No overflow */
    1075             : 
    1076    33977313 :     int cmp = BPLUS_(private_key_cmp)( query, &pair[im].BPLUS_PAIR_KEY );
    1077    33977313 :     if( FD_UNLIKELY( !cmp ) ) return im; /* Found, optimize for big trees */
    1078    20820888 :     i0 = fd_ulong_if( cmp<0, i0, im+1UL );
    1079    20820888 :     i1 = fd_ulong_if( cmp<0, im, i1     );
    1080             : 
    1081    20820888 :   } while( FD_LIKELY( i1-i0 ) ); /* optimize for big trees */
    1082             : 
    1083     7498389 :   return pair_cnt; /* not found */
    1084    20654814 : }
    1085             : 
    1086             : /* bplus_private_child_insert inserts a child at position child_idx into
    1087             :    parent.  Parent should have a tree_cnt in [1,tree_max-1] and
    1088             :    child_idx should be in [1,tree_cnt] (such that the child is never
    1089             :    inserted into a parent with no children or a parent with the maximum
    1090             :    number of children and is never inserted as the first born child).
    1091             :    child_off is the bplus global offset of the child.  This can be a
    1092             :    node or leaf but it should match parent's current children.
    1093             :    child_pivot is the pivot value associated with the child and the
    1094             :    child_idx should preserve the parent's pivot sorting.  Further, child
    1095             :    should not contain any keys that outside the parent's pivot range
    1096             :    after the insert. */
    1097             : 
    1098             : static void
    1099             : BPLUS_(private_child_insert)( BPLUS_(private_node_t) * FD_RESTRICT parent,
    1100             :                               ulong                                child_idx,
    1101             :                               ulong                                child_off,
    1102     1198194 :                               BPLUS_KEY_T const      * FD_RESTRICT child_pivot ) {
    1103     1198194 :   ulong                     tree_cnt = parent->tree_cnt;
    1104     1198194 :   ulong       * FD_RESTRICT tree_off = parent->tree_off;
    1105     1198194 :   BPLUS_KEY_T * FD_RESTRICT pivot    = parent->pivot;
    1106             : 
    1107             :   /* Make room for child at child_idx by shifting childen currently at
    1108             :      or after child_idx up one. */
    1109             : 
    1110     2885352 :   for( ulong sibling_idx=tree_cnt; sibling_idx>child_idx; sibling_idx-- ) {
    1111     1687158 :     tree_off[sibling_idx    ] = tree_off[sibling_idx-1UL];
    1112     1687158 :     pivot   [sibling_idx-1UL] = pivot   [sibling_idx-2UL];
    1113     1687158 :   }
    1114             : 
    1115             :   /* Insert the child at child_idx */
    1116             : 
    1117     1198194 :   tree_off[child_idx    ] = child_off;
    1118     1198194 :   pivot   [child_idx-1UL] = child_pivot[0];
    1119             : 
    1120     1198194 :   parent->tree_cnt = tree_cnt + 1UL; /* In [2,tree_max] */
    1121     1198194 : }
    1122             : 
    1123             : /* bplus_private_child_remove removes the child child_idx from the bplus
    1124             :    node parent.  Assumes parent is valid with a tree cnt in [2,tree_max]
    1125             :    and that child is in [1,tree_cnt) (as such, this will never remove
    1126             :    the first born child). */
    1127             : 
    1128             : static void
    1129             : BPLUS_(private_child_remove)( BPLUS_(private_node_t) * FD_RESTRICT parent,
    1130     1193622 :                               ulong                                child_idx ) {
    1131     1193622 :   ulong                     tree_cnt = parent->tree_cnt;
    1132     1193622 :   ulong       * FD_RESTRICT tree_off = parent->tree_off;
    1133     1193622 :   BPLUS_KEY_T * FD_RESTRICT pivot    = parent->pivot;
    1134             : 
    1135             :   /* Fill the hole at child_idx by shifting childen currently at or
    1136             :      after child_idx down one. */
    1137             : 
    1138     1193622 :   tree_cnt--;
    1139     2689209 :   for( ulong sibling_idx=child_idx; sibling_idx<tree_cnt; sibling_idx++ ) {
    1140     1495587 :     tree_off[sibling_idx    ] = tree_off[sibling_idx+1UL];
    1141     1495587 :     pivot   [sibling_idx-1UL] = pivot   [sibling_idx    ];
    1142     1495587 :   }
    1143             : 
    1144     1193622 :   parent->tree_cnt = tree_cnt; /* In [1,tree_max-1] */
    1145     1193622 : }
    1146             : 
    1147             : ulong
    1148          18 : BPLUS_(leaf_max_est)( ulong ele_max_est ) {
    1149             : 
    1150             :   /* No leaves needed for always empty trees */
    1151             : 
    1152          18 :   if( FD_UNLIKELY( !ele_max_est ) ) return 0UL;
    1153             : 
    1154             :   /* Trivial bplus trees have just a root leaf */
    1155             : 
    1156          12 :   if( FD_UNLIKELY( ele_max_est<=BPLUS_(private_pair_max)() ) ) return 1UL;
    1157             : 
    1158             :   /* In a non-trivial bplus tree, each leaf has at least
    1159             :      pair_min==pair_max/2 elements.  So, we require:
    1160             : 
    1161             :           leaf_max*pair_min >= ele_max_est
    1162             :        -> leaf_max >= ele_max_est / pair_min
    1163             : 
    1164             :      The smallest leaf_max that satisfies this is:
    1165             : 
    1166             :           ceil( ele_max_est / pair_min )
    1167             :        -> floor( (ele_max_est + pair_min - 1) / pair_min )
    1168             :        -> 1 + floor( (ele_max_est - 1) / pair_min */
    1169             : 
    1170           6 :   return 1UL + ((ele_max_est-1UL) / BPLUS_(private_pair_min)()); /* No overflow */
    1171          12 : }
    1172             : 
    1173             : ulong
    1174           9 : BPLUS_(node_max_est)( ulong ele_max_est ) {
    1175             : 
    1176             :   /* Start at the leaf layer with leaf_max trees */
    1177             : 
    1178           9 :   ulong node_max = 0UL;
    1179           9 :   ulong tree_cnt = BPLUS_(leaf_max_est)( ele_max_est );
    1180             : 
    1181          30 :   while( tree_cnt>1UL ) {
    1182             : 
    1183             :     /* At this point, we have more than one tree in the current layer.
    1184             :        To reduce the number of trees, we create a new layer of nodes
    1185             :        above it and make each new node responsible for up to
    1186             :        tree_min==tree_max/2 of the trees in the current layer to give a
    1187             :        reasonably tight bound to the worst case.  That implies this new
    1188             :        layer will need at most:
    1189             : 
    1190             :             ceil( tree_cnt / tree_min )
    1191             :          -> floor( (tree_cnt + tree_min - 1) / tree_min )
    1192             :          -> 1 + floor( (tree_cnt - 1) / tree_min )
    1193             : 
    1194             :        nodes and this layer will reduce to the number of trees to the
    1195             :        same number. */
    1196             : 
    1197          21 :     tree_cnt = 1UL + ((tree_cnt-1UL) / BPLUS_(private_tree_min)()); /* No overflow */
    1198          21 :     node_max += tree_cnt;
    1199             : 
    1200          21 :   }
    1201             : 
    1202           9 :   return node_max;
    1203           9 : }
    1204             : 
    1205             : ulong
    1206           0 : BPLUS_(align)( void ) {
    1207           0 :   return BPLUS_ALIGN;
    1208           0 : }
    1209             : 
    1210             : ulong
    1211             : BPLUS_(footprint)( ulong node_max,
    1212          18 :                    ulong leaf_max ) {
    1213             : 
    1214          18 :   if( FD_UNLIKELY( (node_max > BPLUS_(private_node_max_max)()) | (leaf_max > BPLUS_(private_leaf_max_max)()) ) ) return 0UL;
    1215             : 
    1216             :   /* At this point, the needed node and leaf storage is at most 2^63,
    1217             :      which is impractically large but also with plenty of room left over
    1218             :      for the metadata and remaining alignment padding. */
    1219             : 
    1220           6 :   ulong off = 0UL;                                  /**/                     off +=          sizeof( BPLUS_(private_t)      );
    1221           6 :   off = fd_ulong_align_up( off, BPLUS_NODE_ALIGN ); /*ulong node_lo = off;*/ off += node_max*sizeof( BPLUS_(private_node_t) );
    1222           6 :   off = fd_ulong_align_up( off, BPLUS_LEAF_ALIGN ); /*ulong leaf_lo = off;*/ off += leaf_max*sizeof( BPLUS_(private_leaf_t) );
    1223           6 :   off = fd_ulong_align_up( off, BPLUS_ALIGN );
    1224             : 
    1225           6 :   return off;
    1226          18 : }
    1227             : 
    1228             : void
    1229           6 : BPLUS_(flush)( BPLUS_(t) * bplus ) {
    1230           6 :   bplus->node_pool_off = 0UL;
    1231           6 :   bplus->leaf_pool_off = 0UL;
    1232           6 :   bplus->root_off      = 0UL;
    1233           6 :   bplus->leaf_min_off  = 0UL;
    1234           6 :   bplus->leaf_max_off  = 0UL;
    1235             : 
    1236           6 :   BPLUS_(private_node_t) * node = BPLUS_(private_node)( bplus, bplus->node_lo );
    1237        6162 :   for( ulong node_rem=bplus->node_max; node_rem; node_rem-- ) BPLUS_(private_node_release)( bplus, &node[ node_rem-1UL ] );
    1238             : 
    1239           6 :   BPLUS_(private_leaf_t) * leaf = BPLUS_(private_leaf)( bplus, bplus->leaf_lo );
    1240       12294 :   for( ulong leaf_rem=bplus->leaf_max; leaf_rem; leaf_rem-- ) BPLUS_(private_leaf_release)( bplus, &leaf[ leaf_rem-1UL ] );
    1241           6 : }
    1242             : 
    1243             : void *
    1244             : BPLUS_(new)( void * shmem,
    1245             :              ulong  node_max,
    1246          15 :              ulong  leaf_max ) {
    1247          15 :   BPLUS_(private_t) * bplus = (BPLUS_(private_t) *)shmem;
    1248             : 
    1249          15 :   if( FD_UNLIKELY( !bplus ) ) {
    1250           3 :     FD_LOG_WARNING(( "NULL shmem" ));
    1251           3 :     return NULL;
    1252           3 :   }
    1253             : 
    1254          12 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)bplus, BPLUS_ALIGN ) ) ) {
    1255           3 :     FD_LOG_WARNING(( "misaligned shmem" ));
    1256           3 :     return NULL;
    1257           3 :   }
    1258             : 
    1259           9 :   ulong footprint = BPLUS_(footprint)( node_max, leaf_max );
    1260           9 :   if( FD_UNLIKELY( !footprint ) ) {
    1261           6 :     FD_LOG_WARNING(( "bad node_max and/or leaf_max" ));
    1262           6 :     return NULL;
    1263           6 :   }
    1264             : 
    1265             :   /* Note: it is the caller's responsibility to clear the memory because
    1266             :      it is potentially very big and very time consuming to do so and may
    1267             :      already have been cleared (e.g. mmap from the OS) */
    1268             : 
    1269           3 :   ulong off;
    1270           3 :   off = 0UL;                                        /**/                 off +=          sizeof( BPLUS_(private_t)      );
    1271           3 :   off = fd_ulong_align_up( off, BPLUS_NODE_ALIGN ); ulong node_lo = off; off += node_max*sizeof( BPLUS_(private_node_t) );
    1272           3 :   off = fd_ulong_align_up( off, BPLUS_LEAF_ALIGN ); ulong leaf_lo = off; off += leaf_max*sizeof( BPLUS_(private_leaf_t) );
    1273           3 :   off = fd_ulong_align_up( off, BPLUS_ALIGN );
    1274             : 
    1275           3 :   bplus->node_max      = node_max; bplus->leaf_max      = leaf_max;
    1276           3 :   bplus->node_lo       = node_lo;  bplus->leaf_lo       = leaf_lo;
    1277             : 
    1278           3 :   BPLUS_(flush)( bplus );
    1279             : 
    1280           3 :   FD_COMPILER_MFENCE();
    1281           3 :   bplus->magic = BPLUS_MAGIC;
    1282           3 :   FD_COMPILER_MFENCE();
    1283             : 
    1284           3 :   return shmem;
    1285           9 : }
    1286             : 
    1287             : BPLUS_(t) *
    1288          12 : BPLUS_(join)( void * shbplus ) {
    1289          12 :   BPLUS_(private_t) * bplus = (BPLUS_(private_t) *)shbplus;
    1290             : 
    1291          12 :   if( FD_UNLIKELY( !bplus ) ) {
    1292           3 :     FD_LOG_WARNING(( "NULL shbplus" ));
    1293           3 :     return NULL;
    1294           3 :   }
    1295             : 
    1296           9 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)bplus, BPLUS_ALIGN ) ) ) {
    1297           3 :     FD_LOG_WARNING(( "misaligned shbplus" ));
    1298           3 :     return NULL;
    1299           3 :   }
    1300             : 
    1301           6 :   if( FD_UNLIKELY( bplus->magic!=BPLUS_MAGIC ) ) {
    1302           3 :     FD_LOG_WARNING(( "bad magic" ));
    1303           3 :     return NULL;
    1304           3 :   }
    1305             : 
    1306           3 :   return (BPLUS_(t) *)bplus;
    1307           6 : }
    1308             : 
    1309             : void *
    1310           6 : BPLUS_(leave)( BPLUS_(t) * join ) {
    1311           6 :   if( FD_UNLIKELY( !join ) ) {
    1312           3 :     FD_LOG_WARNING(( "NULL join" ));
    1313           3 :     return NULL;
    1314           3 :   }
    1315             : 
    1316           3 :   return (void *)join;
    1317           6 : }
    1318             : 
    1319             : void *
    1320          12 : BPLUS_(delete)( void * shbplus ) {
    1321          12 :   BPLUS_(private_t) * bplus = (BPLUS_(private_t) *)shbplus;
    1322             : 
    1323          12 :   if( FD_UNLIKELY( !bplus ) ) {
    1324           3 :     FD_LOG_WARNING(( "NULL shbplus" ));
    1325           3 :     return NULL;
    1326           3 :   }
    1327             : 
    1328           9 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)bplus, BPLUS_ALIGN ) ) ) {
    1329           3 :     FD_LOG_WARNING(( "misaligned shbplus" ));
    1330           3 :     return NULL;
    1331           3 :   }
    1332             : 
    1333           6 :   if( FD_UNLIKELY( bplus->magic!=BPLUS_MAGIC ) ) {
    1334           3 :     FD_LOG_WARNING(( "bad magic" ));
    1335           3 :     return NULL;
    1336           3 :   }
    1337             : 
    1338           3 :   FD_COMPILER_MFENCE();
    1339           3 :   bplus->magic = 0UL;
    1340           3 :   FD_COMPILER_MFENCE();
    1341             : 
    1342           3 :   return (void *)bplus;
    1343           6 : }
    1344             : 
    1345             : BPLUS_PAIR_T const *
    1346             : BPLUS_(query_const)( BPLUS_(t)   const * join,
    1347    15014253 :                      BPLUS_KEY_T const * query ) {
    1348    15014253 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
    1349             : 
    1350             :   /* If an empty bplus tree, not found */
    1351             : 
    1352    15014253 :   ulong tree_off = bplus->root_off;
    1353    15014253 :   if( FD_UNLIKELY( !tree_off ) ) return NULL; /* optimize for big trees */
    1354             : 
    1355             :   /* At this point, the bplus tree is not empty.  Find the leaf that
    1356             :      might contain query. */
    1357             : 
    1358    15013974 :   ulong leaf_lo = bplus->leaf_lo;
    1359    95716287 :   while( FD_LIKELY( !BPLUS_(private_is_leaf)( tree_off, leaf_lo ) ) ) { /* optimize for big trees */
    1360    80702313 :     BPLUS_(private_node_t) const * node = BPLUS_(private_node_const)( bplus, tree_off );
    1361    80702313 :     tree_off = node->tree_off[ BPLUS_(private_node_query)( node->pivot, node->tree_cnt, query ) ];
    1362    80702313 :   }
    1363    15013974 :   BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, tree_off );
    1364             : 
    1365             :   /* At this point, leaf might contain query.  Query the leaf */
    1366             : 
    1367    15013974 :   pair_t const * pair     = leaf->pair;
    1368    15013974 :   ulong          pair_cnt = leaf->pair_cnt;
    1369    15013974 :   ulong          pair_idx = BPLUS_(private_pair_query)( pair, pair_cnt, query );
    1370             : 
    1371    15013974 :   return fd_ptr_if( pair_idx<pair_cnt, &pair[ pair_idx ], NULL );
    1372    15014253 : }
    1373             : 
    1374             : BPLUS_PAIR_T *
    1375             : BPLUS_(private_insert)( BPLUS_(t) *         join,
    1376             :                         BPLUS_KEY_T const * key,
    1377             :                         int                 upsert,
    1378     7525116 :                         int *               _insert ) {
    1379     7525116 :   BPLUS_(private_t) * bplus = BPLUS_(private)( join );
    1380             : 
    1381             :   /* If the bplus tree is empty, create the root leaf and insert the key
    1382             :      into it */
    1383             : 
    1384     7525116 :   ulong tree_off = bplus->root_off;
    1385     7525116 :   if( FD_UNLIKELY( !tree_off ) ) { /* Empty bplus, optimize for big */
    1386             : 
    1387         189 :     BPLUS_(private_leaf_t) * root = BPLUS_(private_leaf_acquire)( bplus );
    1388         189 :     if( FD_UNLIKELY( !root ) ) return NULL; /* no room for insert */
    1389             : 
    1390         189 :     root->prev_off               = 0UL;
    1391         189 :     root->next_off               = 0UL;
    1392         189 :     root->pair_cnt               = 1UL;
    1393         189 :     root->pair[0].BPLUS_PAIR_KEY = key[0];
    1394         189 :     ulong root_off = BPLUS_(private_off)( bplus, root );
    1395         189 :     bplus->root_off     = root_off;
    1396         189 :     bplus->leaf_min_off = root_off;
    1397         189 :     bplus->leaf_max_off = root_off;
    1398             : 
    1399         189 :     *_insert = 1;
    1400         189 :     return &root->pair[0];
    1401             : 
    1402         189 :   }
    1403             : 
    1404             :   /* At this point, the bplus tree is not empty.  We recurse through
    1405             :      interior nodes to find the leaf that should hold key, splitting
    1406             :      interior nodes as we go. */
    1407             : 
    1408     7524927 :   ulong tree_min = BPLUS_(private_tree_min)();
    1409     7524927 :   ulong tree_max = BPLUS_(private_tree_max)(); /* ==tree_min*2 */
    1410             : 
    1411     7524927 :   BPLUS_(private_node_t) * parent    = NULL;
    1412     7524927 :   ulong                    child_idx = 0UL;
    1413             : 
    1414     7524927 :   ulong leaf_lo = bplus->leaf_lo;
    1415    47962443 :   while( FD_LIKELY( !BPLUS_(private_is_leaf)( tree_off, leaf_lo ) ) ) { /* Optimize for big trees */
    1416    40437516 :     BPLUS_(private_node_t) * node = BPLUS_(private_node)( bplus, tree_off );
    1417             : 
    1418             :     /* At this point, we should insert key into one of the node's trees
    1419             :        and tree_cnt is in [2,tree_max] (root) or [tree_min,tree_max]
    1420             :        (non-root).  If the node has a parent, parent and all node's
    1421             :        siblings are nodes and parent has in [2,tree_max-1] (root parent)
    1422             :        or [tree_min,tree_max-1] (non-root parent) children.  (tree_max-1
    1423             :        because if it had tree_max children when insert started, we would
    1424             :        have split it on the previous iteration).
    1425             : 
    1426             :        If the node is full, split it. */
    1427             : 
    1428    40437516 :     ulong tree_cnt = node->tree_cnt;
    1429    40437516 :     if( FD_UNLIKELY( tree_cnt==tree_max ) ) { /* Optimize for high radix */
    1430             : 
    1431             :       /* Acquire resources.  If node is the root, this includes making a
    1432             :          new root node and making the new root node's parent. */
    1433             : 
    1434      231162 :       BPLUS_(private_node_t) * new_node = BPLUS_(private_node_acquire)( bplus );
    1435      231162 :       if( FD_UNLIKELY( !new_node ) ) return NULL; /* No room for insert */
    1436             : 
    1437      231162 :       if( FD_UNLIKELY( !parent ) ) {
    1438         714 :         parent = BPLUS_(private_node_acquire)( bplus );
    1439         714 :         if( FD_UNLIKELY( !parent ) ) {
    1440           0 :           BPLUS_(private_node_release)( bplus, new_node );
    1441           0 :           return NULL; /* No room for insert */
    1442           0 :         }
    1443             : 
    1444         714 :         bplus->root_off = BPLUS_(private_off)( bplus, parent );
    1445             : 
    1446         714 :         parent->tree_cnt    = 1UL; /* Will be incremented to 2 by the child_insert below. */
    1447         714 :         parent->tree_off[0] = BPLUS_(private_off)( bplus, node );
    1448             : 
    1449         714 :         child_idx = 0UL;
    1450         714 :       }
    1451             : 
    1452             :       /* At this point, node is child child_idx of parent and we need to
    1453             :          split node.  Further, new_node is the node that will be created
    1454             :          by the split and parent has room to insert a link to new_node.
    1455             :          Split node evenly into new_node and update the parent
    1456             :          accordingly. */
    1457             : 
    1458      231162 :       BPLUS_KEY_T const * median = &node->pivot[ tree_min-1UL ];
    1459             : 
    1460      231162 :       node->tree_cnt = tree_min;
    1461             : 
    1462      231162 :       new_node->tree_cnt = tree_min;
    1463      231162 :       memcpy( new_node->tree_off, node->tree_off + tree_min, sizeof(ulong)      * tree_min      );
    1464      231162 :       memcpy( new_node->pivot,    node->pivot    + tree_min, sizeof(BPLUS_KEY_T)*(tree_min-1UL) );
    1465             : 
    1466      231162 :       BPLUS_(private_child_insert)( parent, child_idx+1UL, BPLUS_(private_off)( bplus, new_node ), median );
    1467             : 
    1468             :       /* Move into the appropriate split */
    1469             : 
    1470      231162 :       node     = fd_ptr_if( BPLUS_(private_key_cmp)( key, median )<0, node, new_node );
    1471      231162 :       tree_cnt = tree_min;
    1472      231162 :     }
    1473             : 
    1474             :     /* At this point, we should insert key into one of the node's trees
    1475             :        and tree_cnt is in [2,tree_max-1] (root) or [tree_min,tree_max-1]
    1476             :        (non root) such that we are guaranteed to be able to insert. */
    1477             : 
    1478    40437516 :     parent    = node;
    1479    40437516 :     child_idx = BPLUS_(private_node_query)( node->pivot, tree_cnt, key );
    1480    40437516 :     tree_off  = node->tree_off[ child_idx ];
    1481    40437516 :   }
    1482             : 
    1483     7524927 :   BPLUS_(private_leaf_t) * leaf = BPLUS_(private_leaf)( bplus, tree_off );
    1484             : 
    1485             :   /* At this point, we'd like to insert key into leaf.  But if leaf is
    1486             :      full, we split it to make room. */
    1487             : 
    1488     7524927 :   ulong pair_min = BPLUS_(private_pair_min)();
    1489     7524927 :   ulong pair_max = BPLUS_(private_pair_max)(); /* ==pair_min*2 */
    1490             : 
    1491     7524927 :   ulong pair_cnt = (ulong)leaf->pair_cnt;
    1492     7524927 :   if( FD_UNLIKELY( pair_cnt==pair_max ) ) { /* optimize for high radix */
    1493             : 
    1494             :     /* Acquire resources.  If leaf is the root, this includes making a
    1495             :        new root node and making the new root node's parent. */
    1496             : 
    1497      967035 :     BPLUS_(private_leaf_t) * new_leaf = BPLUS_(private_leaf_acquire)( bplus );
    1498      967035 :     if( FD_UNLIKELY( !new_leaf ) ) return NULL; /* No room for insert */
    1499             : 
    1500      967032 :     if( FD_UNLIKELY( !parent ) ) {
    1501         159 :       parent = BPLUS_(private_node_acquire)( bplus );
    1502         159 :       if( FD_UNLIKELY( !parent ) ) {
    1503           0 :         BPLUS_(private_leaf_release)( bplus, new_leaf );
    1504           0 :         return NULL; /* No room to insert */
    1505           0 :       }
    1506             : 
    1507         159 :       bplus->root_off = BPLUS_(private_off)( bplus, parent );
    1508             : 
    1509         159 :       parent->tree_cnt    = 1UL; /* Will be incremented to 2 below */
    1510         159 :       parent->tree_off[0] = BPLUS_(private_off)( bplus, leaf );
    1511             : 
    1512         159 :       child_idx = 0UL;
    1513         159 :     }
    1514             : 
    1515             :     /* At this point, leaf is child child_idx of parent and we need to
    1516             :        split leaf.  Further, new_leaf is the leaf that will be created
    1517             :        by the split and parent has room to insert a link to new_leaf.
    1518             :        Split leaf evenly into new_leaf and update the parent
    1519             :        accordingly.  Splitting this leaf might make a new max leaf (it
    1520             :        will never make a new min leaf). */
    1521             : 
    1522      967032 :     BPLUS_KEY_T const * median = &leaf->pair[ pair_min ].BPLUS_PAIR_KEY;
    1523             : 
    1524      967032 :     ulong next_off = leaf->next_off;
    1525             : 
    1526      967032 :     leaf->pair_cnt = pair_min;
    1527      967032 :     leaf->next_off = BPLUS_(private_off)( bplus, new_leaf );
    1528             : 
    1529      967032 :     new_leaf->pair_cnt = pair_min;
    1530      967032 :     new_leaf->prev_off = BPLUS_(private_off)( bplus, leaf );
    1531      967032 :     new_leaf->next_off = next_off;
    1532      967032 :     memcpy( &new_leaf->pair[0], &leaf->pair[pair_min], sizeof(pair_t)*pair_min );
    1533             : 
    1534             :     /* FIXME: BRANCHLESS? */
    1535      967032 :     ulong new_leaf_off = BPLUS_(private_off)( bplus, new_leaf );
    1536      967032 :     if( FD_UNLIKELY( !next_off ) ) bplus->leaf_max_off                               = new_leaf_off;
    1537      963618 :     else                           BPLUS_(private_leaf)( bplus, next_off )->prev_off = new_leaf_off;
    1538             : 
    1539      967032 :     BPLUS_(private_child_insert)( parent, child_idx+1UL, new_leaf_off, median );
    1540             : 
    1541             :     /* Move into the appropriate split */
    1542             : 
    1543      967032 :     leaf     = (BPLUS_(private_key_cmp)( key, median )<0) ? leaf : new_leaf;
    1544      967032 :     pair_cnt = pair_min;
    1545      967032 :   }
    1546             : 
    1547             :   /* At this point, leaf either contains key or is where we should
    1548             :      insert key.  Further, pair_cnt is in [1,pair_max-1] (root) or
    1549             :      [pair_min,pair_max-1] (non root).  Search for key in the leaf.  If
    1550             :      key is not in the leaf, the search will reveal where to put the
    1551             :      key. */
    1552             : 
    1553     7524924 :   BPLUS_PAIR_T * pair = leaf->pair;
    1554     7524924 :   ulong          i0   = 0UL;
    1555     7524924 :   ulong          i1   = pair_cnt;
    1556    12506436 :   do {
    1557             : 
    1558             :     /* At this point, pairs in [0,i0) are before key, pairs in
    1559             :        [i1,pair_cnt) are after key and pairs in [i0,i1) (non-empty) are
    1560             :        not known.  Probe the middle of this range for key. */
    1561             : 
    1562    12506436 :     ulong im = (i0+i1) >> 1; /* no overflow */
    1563             : 
    1564    12506436 :     int cmp = BPLUS_(private_key_cmp)( &pair[ im ].BPLUS_PAIR_KEY, key );
    1565             : 
    1566             :     /* If cmp==0, pair im holds the key and we are done.  Otherwise, if
    1567             :        cmp<0 / cmp>0, pair im is before / after key.  We adjust the
    1568             :        ranges appropriately and recurse. */
    1569             : 
    1570    12506436 :     if( FD_UNLIKELY( !cmp ) ) { /* optimize for big trees */
    1571     3752997 :       leaf->pair_cnt = pair_cnt;
    1572     3752997 :       if( !upsert ) return NULL; /* compile time */
    1573     1875972 :       *_insert = 0;
    1574     1875972 :       return &pair[ im ];
    1575     3752997 :     }
    1576     8753439 :     i0 = fd_ulong_if( cmp>0, i0, im+1UL );
    1577     8753439 :     i1 = fd_ulong_if( cmp>0, im, i1     );
    1578             : 
    1579     8753439 :   } while( i1>i0 );
    1580             : 
    1581             :   /* At this point, leaf does not contain key, pairs [0,i0) are before
    1582             :      key, pairs [i0,pair_cnt) are after key and we have room for key.
    1583             :      Move pairs [i0,pair_cnt) right 1 to make room and insert the key at
    1584             :      pair i0. */
    1585             : 
    1586     3771927 :   memmove( pair+i0+1UL, pair+i0, (pair_cnt-i0)*sizeof(BPLUS_PAIR_T) );
    1587     3771927 :   pair[ i0 ].BPLUS_PAIR_KEY = key[0];
    1588     3771927 :   leaf->pair_cnt = pair_cnt + 1UL;
    1589     3771927 :   *_insert = 1;
    1590     3771927 :   return &pair[ i0 ];
    1591     7524924 : }
    1592             : 
    1593             : int
    1594             : BPLUS_(remove_key)( BPLUS_(t)         * join,
    1595     5640936 :                     BPLUS_KEY_T const * key ) {
    1596     5640936 :   BPLUS_(private_t) * bplus = BPLUS_(private)( join );
    1597             : 
    1598             :   /* If tree is empty, nothing to remove */
    1599             : 
    1600     5640936 :   ulong tree_off = bplus->root_off;
    1601     5640936 :   if( FD_UNLIKELY( !tree_off ) ) return -1; /* not found, optimize for found */
    1602             : 
    1603             :   /* At this point, the tree is not empty.  Find the path through the
    1604             :      tree to the leaf with the key to remove.  Note that 128 is more
    1605             :      than enough given strong lg N depth algorithmic guarantees and wide
    1606             :      radices. */
    1607             : 
    1608     5640840 :   BPLUS_(private_node_t) * path_node    [ 128 ];
    1609     5640840 :   ulong                    path_tree_idx[ 128 ];
    1610     5640840 :   ulong                    path_cnt = 0UL;
    1611             : 
    1612     5640840 :   ulong leaf_lo = bplus->leaf_lo;
    1613    35960184 :   while( FD_LIKELY( !BPLUS_(private_is_leaf)( tree_off, leaf_lo ) ) ) { /* optimize for big trees */
    1614    30319344 :     BPLUS_(private_node_t) * node = BPLUS_(private_node)( bplus, tree_off );
    1615             : 
    1616    30319344 :     ulong tree_idx = BPLUS_(private_node_query)( node->pivot, node->tree_cnt, key );
    1617             : 
    1618    30319344 :     path_node    [ path_cnt ] = node;
    1619    30319344 :     path_tree_idx[ path_cnt ] = tree_idx;
    1620    30319344 :     path_cnt++;
    1621             : 
    1622    30319344 :     tree_off = node->tree_off[ tree_idx ];
    1623    30319344 :   }
    1624             : 
    1625     5640840 :   BPLUS_(private_leaf_t) * leaf = BPLUS_(private_leaf)( bplus, tree_off );
    1626             : 
    1627             :   /* At this point, leaf might contain key.  Search for key. */
    1628             : 
    1629     5640840 :   BPLUS_PAIR_T * pair     = leaf->pair;
    1630     5640840 :   ulong          pair_cnt = leaf->pair_cnt;
    1631     5640840 :   ulong          pair_idx = BPLUS_(private_pair_query)( pair, pair_cnt, key );
    1632             : 
    1633     5640840 :   if( FD_UNLIKELY( pair_idx>=pair_cnt ) ) return -1; /* not found, optimize for found */
    1634             : 
    1635             :   /* At this point, pair[ pair_idx ] is the pair to remove.  Remove it. */
    1636             : 
    1637     3763473 :   pair_cnt--;
    1638     6947751 :   for( ulong idx=pair_idx; idx<pair_cnt; idx++ ) pair[idx] = pair[idx+1UL];
    1639     3763473 :   leaf->pair_cnt = pair_cnt; /* FIXME: MOVE BELOW? */
    1640             : 
    1641             :   /* At this point, the leaf might be unbalanced but everything else in
    1642             :      the bplus tree is balanced. */
    1643             : 
    1644     3763473 :   if( FD_UNLIKELY( !path_cnt ) ) { /* optimize for big trees */
    1645             : 
    1646             :     /* At this point, we removed a pair from the root leaf and the
    1647             :        leaf's pair_cnt is in [0,pair_max-1] .  If there are still pairs
    1648             :        in the leaf, the bplus tree is still balanced and we are done.
    1649             :        Otherwise, we release the leaf and make an empty bplus tree
    1650             :        (which is balanced by definition). */
    1651             : 
    1652         561 :     if( FD_LIKELY( pair_cnt ) ) return 0; /* optimize for big trees */
    1653         186 :     bplus->root_off     = 0UL;
    1654         186 :     bplus->leaf_min_off = 0UL;
    1655         186 :     bplus->leaf_max_off = 0UL;
    1656         186 :     BPLUS_(private_leaf_release)( bplus, leaf );
    1657         186 :     return 0;
    1658             : 
    1659         561 :   }
    1660             : 
    1661             :   /* At this point, we removed a pair from a non-root leaf and the
    1662             :      leaf's pair_cnt is in [pair_min-1,pair_max-1].  If there are at
    1663             :      least pair_min pairs left in the leaf, the bplus tree is still
    1664             :      balanced and we are done. */
    1665             : 
    1666     3762912 :   ulong pair_min = BPLUS_(private_pair_min)();
    1667     3762912 :   ulong pair_max = BPLUS_(private_pair_max)();
    1668             : 
    1669     3762912 :   if( FD_LIKELY( pair_cnt>=pair_min ) ) return 0; /* optimize for big trees */
    1670             : 
    1671             :   /* At this point, we removed a pair from a non-root leaf and its
    1672             :      pair_cnt is pair_min-1.  As such, it is not balanced with its
    1673             :      siblings (leaf must have at least leaf_min-1 siblings that must
    1674             :      also be leaves with a pair_cnt in [pair_min,pair_max]).  Determine
    1675             :      which sibling to use for rebalancing and how to rebalance with this
    1676             :      sibling.  This sibling will have a pair cnt in [pair_min,pair_max].
    1677             : 
    1678             :      Note: Could be more adaptive here (e.g. pick the larger sibling
    1679             :      when leaf is a middle child). */
    1680             : 
    1681     1660878 :   path_cnt--;
    1682     1660878 :   BPLUS_(private_node_t) * parent    = path_node    [ path_cnt ];
    1683     1660878 :   ulong                    child_idx = path_tree_idx[ path_cnt ];
    1684             : 
    1685     1660878 :   ulong sib0_idx = child_idx - (ulong)(child_idx>0UL);
    1686     1660878 :   ulong sib1_idx = sib0_idx  + 1UL;
    1687             : 
    1688     1660878 :   ulong sib0_off = parent->tree_off[ sib0_idx ];
    1689     1660878 :   ulong sib1_off = parent->tree_off[ sib1_idx ];
    1690             : 
    1691     1660878 :   BPLUS_(private_leaf_t) * sib0 = BPLUS_(private_leaf)( bplus, sib0_off );
    1692     1660878 :   BPLUS_(private_leaf_t) * sib1 = BPLUS_(private_leaf)( bplus, sib1_off );
    1693             : 
    1694     1660878 :   ulong sib0_pair_cnt = sib0->pair_cnt;
    1695     1660878 :   ulong sib1_pair_cnt = sib1->pair_cnt;
    1696             : 
    1697     1660878 :   ulong reb_pair_cnt = sib0_pair_cnt + sib1_pair_cnt; /* in [pair_max-1,2*pair_max-1]. */
    1698     1660878 :   if( FD_LIKELY( reb_pair_cnt>=pair_max ) ) {
    1699             : 
    1700             :     /* At this point, reb_pair_cnt is in [pair_max,2*pair_max-1].
    1701             :        Divide these as evenly as possible between sib0 and sib1 and
    1702             :        update the parent's pivot accordingly.  Since we do not remove
    1703             :        any trees from the parent, this will rebalance the whole bplus
    1704             :        tree fully and we are done. */
    1705             : 
    1706      697206 :     ulong new_sib0_pair_cnt = reb_pair_cnt >> 1;
    1707      697206 :     ulong new_sib1_pair_cnt = reb_pair_cnt - new_sib0_pair_cnt;
    1708             : 
    1709      697206 :     if( new_sib0_pair_cnt>sib0_pair_cnt ) { /* Shift pairs from sib1 into sib0 */
    1710             : 
    1711      167931 :       ulong delta = new_sib0_pair_cnt - sib0_pair_cnt;
    1712      167931 :       memcpy ( sib0->pair + sib0_pair_cnt, sib1->pair,         sizeof(BPLUS_PAIR_T)*delta             );
    1713      167931 :       memmove( sib1->pair,                 sib1->pair + delta, sizeof(BPLUS_PAIR_T)*new_sib1_pair_cnt );
    1714             : 
    1715      529275 :     } else { /* Shift pairs from sib0 into sib1 */
    1716             : 
    1717      529275 :       ulong delta = sib0_pair_cnt - new_sib0_pair_cnt;
    1718      529275 :       memmove( sib1->pair + delta, sib1->pair,                     sizeof(BPLUS_PAIR_T)*sib1_pair_cnt );
    1719      529275 :       memcpy ( sib1->pair,         sib0->pair + new_sib0_pair_cnt, sizeof(BPLUS_PAIR_T)*delta         );
    1720             : 
    1721      529275 :     }
    1722             : 
    1723      697206 :     sib0->pair_cnt = new_sib0_pair_cnt;
    1724      697206 :     sib1->pair_cnt = new_sib1_pair_cnt;
    1725             : 
    1726      697206 :     parent->pivot[sib0_idx] = sib1->pair[0].BPLUS_PAIR_KEY;
    1727      697206 :     return 0;
    1728      697206 :   }
    1729             : 
    1730             :   /* At this point, reb_pair_cnt is pair_max-1 such that these siblings
    1731             :      must be merged to restore balance among the leaves.  This might
    1732             :      change the leaf max from sib1 to sib0. */
    1733             : 
    1734      963672 :   memcpy( sib0->pair + sib0_pair_cnt, sib1->pair, sizeof(BPLUS_PAIR_T)*sib1_pair_cnt );
    1735      963672 :   sib0->pair_cnt = reb_pair_cnt;
    1736             : 
    1737      963672 :   ulong sib2_off = sib1->next_off;
    1738      963672 :   sib0->next_off = sib2_off;
    1739             : 
    1740             :   /* FIXME: DO BRANCHLESS? */
    1741      963672 :   if( FD_UNLIKELY( !sib2_off ) ) bplus->leaf_max_off                               = sib0_off;
    1742      961158 :   else                           BPLUS_(private_leaf)( bplus, sib2_off )->prev_off = sib0_off;
    1743             : 
    1744      963672 :   BPLUS_(private_child_remove)( parent, sib1_idx );
    1745      963672 :   BPLUS_(private_leaf_release)( bplus, sib1 );
    1746             : 
    1747             :   /* The merge might have unbalance parent among its siblings.  If it
    1748             :      has not, we are done.  Otherwise, we rebalance parent among its
    1749             :      siblings.  That might unbalance the grandparent among its siblings.
    1750             :      And so on along the path potentially all the back to the bplus tree
    1751             :      root. */
    1752             : 
    1753      963672 :   ulong tree_min = BPLUS_(private_tree_min)();
    1754      963672 :   ulong tree_max = BPLUS_(private_tree_max)();
    1755             : 
    1756     1193622 :   while( FD_LIKELY( path_cnt ) ) { /* optimize for big trees */
    1757     1189584 :     BPLUS_(private_node_t) * child = parent;
    1758             : 
    1759             :     /* At this point, because we just removed a tree from child, child's
    1760             :        tree_cnt is in [tree_min-1,tree_max-1] but everything else is
    1761             :        balanced.  If the child has at least tree_min trees, the bplus
    1762             :        tree is still balanced. */
    1763             : 
    1764     1189584 :     ulong child_tree_cnt = child->tree_cnt;
    1765     1189584 :     if( FD_LIKELY( child_tree_cnt>=tree_min ) ) return 0; /* optimize for big trees */
    1766             : 
    1767             :     /* At this point, child's tree_cnt is tree_min-1.  As such, it is
    1768             :        not balanced with its siblings (child must have at least
    1769             :        leaf_min-1 siblings that must also be nodes with a tree_cnt in
    1770             :        [tree_min,tree_max]).  Determine which sibling to use for
    1771             :        rebalancing and how to rebalance.
    1772             : 
    1773             :        Note: Could be more adaptive here (e.g. pick the larger sibling
    1774             :        if a middle child). */
    1775             : 
    1776      410850 :     path_cnt--;
    1777      410850 :     parent    = path_node    [ path_cnt ];
    1778      410850 :     child_idx = path_tree_idx[ path_cnt ];
    1779             : 
    1780      410850 :     ulong sib0_idx = child_idx - (ulong)(child_idx>0UL);
    1781      410850 :     ulong sib1_idx = sib0_idx  + 1UL;
    1782             : 
    1783      410850 :     ulong sib0_off = parent->tree_off[ sib0_idx ];
    1784      410850 :     ulong sib1_off = parent->tree_off[ sib1_idx ];
    1785             : 
    1786      410850 :     BPLUS_(private_node_t) * sib0 = BPLUS_(private_node)( bplus, sib0_off );
    1787      410850 :     BPLUS_(private_node_t) * sib1 = BPLUS_(private_node)( bplus, sib1_off );
    1788             : 
    1789      410850 :     ulong sib0_tree_cnt = sib0->tree_cnt;
    1790      410850 :     ulong sib1_tree_cnt = sib1->tree_cnt;
    1791             : 
    1792      410850 :     ulong reb_tree_cnt = sib0_tree_cnt + sib1_tree_cnt; /* in [tree_max-1,2*tree_max-1]. */
    1793      410850 :     if( FD_LIKELY( reb_tree_cnt>=tree_max ) ) {
    1794             : 
    1795             :       /* At this point, reb_tree_cnt is in [tree_max,2*tree_max-1].
    1796             :          Divide these as evenly as possible between sib0 and sib1 and
    1797             :          update the parent's pivot accordingly.  Since we do not remove
    1798             :          any trees from parent, this will rebalance the whole bplus tree
    1799             :          and we are done. */
    1800             : 
    1801      180900 :       ulong new_sib0_tree_cnt = reb_tree_cnt >> 1;
    1802      180900 :       ulong new_sib1_tree_cnt = reb_tree_cnt - new_sib0_tree_cnt;
    1803             : 
    1804      180900 :       if( new_sib0_tree_cnt>sib0_tree_cnt ) { /* Shift leading sib1 trees to trailing sib0 trees */
    1805             : 
    1806       43557 :         ulong delta = new_sib0_tree_cnt - sib0_tree_cnt;
    1807       43557 :         memcpy ( sib0->tree_off + sib0_tree_cnt, sib1->tree_off,         sizeof(ulong)*delta             );
    1808       43557 :         memmove( sib1->tree_off,                 sib1->tree_off + delta, sizeof(ulong)*new_sib1_tree_cnt );
    1809             : 
    1810             :         /* Copy parent pivot and leading delta-1 sib1 pivots into sib0. */
    1811             : 
    1812       43557 :         sib0->pivot[ sib0_tree_cnt-1UL ] = parent->pivot[ sib0_idx ];
    1813       43557 :         memcpy( sib0->pivot + sib0_tree_cnt, sib1->pivot, (delta-1UL)*sizeof(BPLUS_KEY_T) );
    1814             : 
    1815             :         /* At this point, there is 1 hole in the parent pivots and
    1816             :            delta-1 holes in the leading sib1 pivots.  Copy the next sib1
    1817             :            pivot to the parent. */
    1818             : 
    1819       43557 :         parent->pivot[ sib0_idx ] = sib1->pivot[ delta-1UL ];
    1820             : 
    1821             :         /* At this point, there are delta holes in the leading sib1
    1822             :            pivots.  Shift remaining sib1 pivots down delta. */
    1823             : 
    1824       43557 :         memmove( sib1->pivot, sib1->pivot+delta, (new_sib1_tree_cnt-1UL)*sizeof(BPLUS_KEY_T) );
    1825             : 
    1826      137343 :       } else { /* Shift trailing sib0 trees to leading sib1 trees */
    1827             : 
    1828      137343 :         ulong delta = sib0_tree_cnt - new_sib0_tree_cnt;
    1829      137343 :         memmove( sib1->tree_off + delta, sib1->tree_off,                     sizeof(ulong)*sib1_tree_cnt );
    1830      137343 :         memcpy ( sib1->tree_off,         sib0->tree_off + new_sib0_tree_cnt, sizeof(ulong)*delta         );
    1831             : 
    1832             :         /* Shift sib1 pivots up delta. */
    1833             : 
    1834      137343 :         memmove( sib1->pivot+delta, sib1->pivot, (sib1_tree_cnt-1UL)*sizeof(BPLUS_KEY_T) );
    1835             : 
    1836             :         /* At this point, there are delta holes in the leading sib1
    1837             :            pivots.  Copy trailing delta-1 sib0 pivots and parent pivot
    1838             :            into sib1. */
    1839             : 
    1840      137343 :         memcpy( sib1->pivot, sib0->pivot+new_sib0_tree_cnt, (delta-1UL)*sizeof(BPLUS_KEY_T) );
    1841      137343 :         sib1->pivot[ delta-1UL ] = parent->pivot[ sib0_idx ];
    1842             : 
    1843             :         /* At this point, there is 1 hole in the parent pivot.  Copy
    1844             :            trailing sib0 pivot into parent. */
    1845             : 
    1846      137343 :         parent->pivot[ sib0_idx ] = sib0->pivot[ new_sib0_tree_cnt-1UL ];
    1847             : 
    1848      137343 :       }
    1849             : 
    1850      180900 :       sib0->tree_cnt = new_sib0_tree_cnt;
    1851      180900 :       sib1->tree_cnt = new_sib1_tree_cnt;
    1852      180900 :       return 0;
    1853      180900 :     }
    1854             : 
    1855             :     /* At this point, reb_tree_cnt is tree_max-1 such that these
    1856             :        siblings must be merged to restore balance among siblings.  Since
    1857             :        this might unbalance parent relative to its siblings, we need to
    1858             :        keep iterating. */
    1859             : 
    1860      229950 :     memcpy( sib0->tree_off + sib0_tree_cnt, sib1->tree_off, sizeof(ulong)*sib1_tree_cnt );
    1861             : 
    1862      229950 :     sib0->pivot[ sib0_tree_cnt-1UL ] = parent->pivot[ sib0_idx ];
    1863      229950 :     memcpy( sib0->pivot + sib0_tree_cnt, sib1->pivot, sizeof(BPLUS_KEY_T)*(sib1_tree_cnt-1UL) );
    1864             : 
    1865      229950 :     sib0->tree_cnt = reb_tree_cnt;
    1866             : 
    1867      229950 :     BPLUS_(private_child_remove)( parent, sib1_idx );
    1868             : 
    1869      229950 :     BPLUS_(private_node_release)( bplus, sib1 );
    1870      229950 :   }
    1871             : 
    1872             :   /* At this point, parent is the root node and we just removed a tree
    1873             :      from it.  If parent still has more than 1 tree, the bplus tree is
    1874             :      balanced and we are done.  Otherwise, we make parent's sole child
    1875             :      the new root and release parent to finish balancing the tree. */
    1876             : 
    1877        4038 :   if( FD_LIKELY( parent->tree_cnt>1UL ) ) return 0; /* optimize for big trees */
    1878             : 
    1879         855 :   bplus->root_off = parent->tree_off[ 0 ];
    1880         855 :   BPLUS_(private_node_release)( bplus, parent );
    1881         855 :   return 0;
    1882        4038 : }
    1883             : 
    1884             : BPLUS_(iter_t)
    1885             : BPLUS_(private_iter)( BPLUS_(t)   const * join,
    1886             :                       BPLUS_KEY_T const * query,
    1887    14996460 :                       int                 op ) {
    1888    14996460 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
    1889             : 
    1890    14996460 :   BPLUS_(iter_t) iter;
    1891             : 
    1892             :   /* If the bplus is empty or query is NULL, return nul */
    1893             : 
    1894    14996460 :   ulong tree_off = bplus->root_off;
    1895    14996460 :   if( FD_UNLIKELY( (!tree_off) | (!query) ) ) { /* empty, optimize for big trees */
    1896         348 :     iter.leaf_off = 0UL;
    1897         348 :     iter.pair_idx = 0UL;
    1898         348 :     return iter;
    1899         348 :   }
    1900             : 
    1901             :   /* At this point, the bplus is not empty.  Find the leaf that might
    1902             :      contain query. */
    1903             : 
    1904    14996112 :   ulong leaf_lo = bplus->leaf_lo;
    1905    95611296 :   while( FD_LIKELY( !BPLUS_(private_is_leaf)( tree_off, leaf_lo ) ) ) { /* Optimize for big trees */
    1906    80615184 :     BPLUS_(private_node_t) const * node = BPLUS_(private_node_const)( bplus, tree_off );
    1907    80615184 :     tree_off = node->tree_off[ BPLUS_(private_node_query)( node->pivot, node->tree_cnt, query ) ];
    1908    80615184 :   }
    1909    14996112 :   BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, tree_off );
    1910             : 
    1911             :   /* At this point, pairs in the previous leaf (if any) have keys less
    1912             :      than query and pairs in the next leaf (if any) have keys greater
    1913             :      than query.  Search the leaf for query. */
    1914             : 
    1915    14996112 :   BPLUS_PAIR_T const * pair     = leaf->pair;
    1916    14996112 :   ulong                pair_cnt = leaf->pair_cnt;
    1917             : 
    1918    14996112 :   ulong i0 = 0UL;
    1919    14996112 :   ulong i1 = pair_cnt;
    1920             : 
    1921    23399328 :   do {
    1922             : 
    1923             :     /* At this point, the range [i0,i1) contains at least 1 pair.  Pairs
    1924             :        [0,i0) have keys less than query, pairs [i1,pair_cnt) have keys
    1925             :        greater than query and we don't know about pairs [i0,i1).  Test
    1926             :        the pair in the middle.
    1927             : 
    1928             :        If this pair's key matches query, because all keys are unique, we
    1929             :        know that pair im is the first pair greater than or equal to
    1930             :        query and that pair im+1 is the first pair greater than query.
    1931             : 
    1932             :        If this pair's key is greater than query, we know all pairs in
    1933             :        [im,pair_cnt) are greater than query so we update i1 to im.
    1934             : 
    1935             :        If this pair's key is less than query, we know that all pairs in
    1936             :        [0,im+1) are less than query so we update i0 to im+1. */
    1937             : 
    1938    23399328 :     ulong im = (i0+i1) >> 1; /* No overflow */
    1939             : 
    1940    23399328 :     int cmp = BPLUS_(private_key_cmp)( &pair[im].BPLUS_PAIR_KEY, query );
    1941    23399328 :     if( FD_UNLIKELY( !cmp ) ) { /* optimize for big trees */
    1942             : 
    1943             :       /* At this point, pairs [0,im) have keys less than query, pair im
    1944             :          key matches query and pairs (im,pair_cnt) are greater than
    1945             :          query.  If:
    1946             : 
    1947             :            op==0 (GE): pick i0 == im   such that [0,i0) are <  query and [i0,pair_cnt) are >= query
    1948             :            op==1 (GT): pick i0 == im+1 such that [0,i0) are <= query and [i0,pair_cnt) are >  query
    1949             :            op==2 (LE): pick i0 == im+1 such that [0,i0) are <= query and [i0,pair_cnt) are >  query
    1950             :            op==3 (LT): pick i0 == im   such that [0,i0) are <  query and [i0,pair_cnt) are >= query */
    1951             : 
    1952     7494132 :       i0 = im + (ulong)((op==1) | (op==2)); /* compile time */
    1953     7494132 :       break;
    1954     7494132 :     }
    1955    15905196 :     i0 = fd_ulong_if( cmp>0, i0, im+1UL );
    1956    15905196 :     i1 = fd_ulong_if( cmp>0, im, i1     );
    1957             : 
    1958    15905196 :   } while( FD_LIKELY( i1-i0 ) ); /* optimize for big trees */
    1959             : 
    1960             :   /* At this point:
    1961             : 
    1962             :        op==0 (GE): pairs [i0,pair_cnt) have keys greater than or equal to query
    1963             :        op==1 (GT): pairs [i0,pair_cnt) have keys greater than             query
    1964             :        op==2 (LE): pairs [0,i0)        have keys less    than or equal to query
    1965             :        op==3 (LT): pairs [0,i0)        have keys less    than             query */
    1966             : 
    1967    14996112 :   if( op<=1 ) { /* compile time */
    1968             : 
    1969     7498056 :     if( FD_UNLIKELY( i0==pair_cnt ) ) { /* optimize for big trees */
    1970             : 
    1971             :       /* At this point:
    1972             : 
    1973             :            op==0 (GE): all pairs have keys less than             query and pairs in any next leaf have keys greater than query
    1974             :            op==1 (GT): all pairs have keys less than or equal to query and pairs in any next leaf have keys greater than query
    1975             : 
    1976             :          position iterator at first pair in next leaf (or nul if this is
    1977             :          the max leaf). */
    1978             : 
    1979     4491171 :       tree_off = leaf->next_off;
    1980     4491171 :       i0       = 0UL;
    1981     4491171 :     }
    1982             : 
    1983     7498056 :   } else {
    1984             : 
    1985     7498056 :     if( FD_UNLIKELY( i0==0UL ) ) { /* optimize for big trees */
    1986             : 
    1987             :       /* At this point:
    1988             : 
    1989             :            op==2 (LE): all pairs have keys greater than             query and pairs in any prev leaf have keys less than query
    1990             :            op==3 (LT): all pairs have keys greater than or equal to query and pairs in any prev leaf have keys less than query
    1991             : 
    1992             :          position iterator at last pair in previous leaf (or nul if this
    1993             :          is the min leaf). */
    1994             : 
    1995      742896 :       tree_off = leaf->prev_off;
    1996      742896 :       i0       = FD_UNLIKELY( !tree_off ) ? 1UL : BPLUS_(private_leaf_const)( bplus, tree_off )->pair_cnt;
    1997      742896 :     }
    1998     7498056 :     i0--;
    1999             : 
    2000     7498056 :   }
    2001             : 
    2002    14996112 :   iter.leaf_off = tree_off;
    2003    14996112 :   iter.pair_idx = i0;
    2004    14996112 :   return iter;
    2005    14996460 : }
    2006             : 
    2007             : int
    2008     1873254 : BPLUS_(verify)( BPLUS_(t) const * join ) {
    2009             : 
    2010 55111332036 : # define BPLUS_TEST(c) do {               \
    2011 52928495214 :     if( FD_UNLIKELY( !(c) ) ) {           \
    2012           0 :       FD_LOG_WARNING(( "FAIL: %s", #c )); \
    2013           0 :       return -1;                          \
    2014           0 :     }                                     \
    2015 52928495214 :   } while(0)
    2016             : 
    2017             :   /* Verify join */
    2018             : 
    2019     1873254 :   BPLUS_TEST( join );
    2020             : 
    2021     1873254 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
    2022             : 
    2023     1873254 :   BPLUS_TEST( fd_ulong_is_aligned( (ulong)bplus, BPLUS_ALIGN ) );
    2024             : 
    2025             :   /* Verify header */
    2026             : 
    2027     1873254 :   BPLUS_TEST( bplus->magic==BPLUS_MAGIC );
    2028             : 
    2029     1873254 :   ulong node_max = bplus->node_max;
    2030     1873254 :   ulong leaf_max = bplus->leaf_max;
    2031             : 
    2032     1873254 :   BPLUS_TEST( node_max<=BPLUS_(private_node_max_max)() );
    2033     1873254 :   BPLUS_TEST( leaf_max<=BPLUS_(private_leaf_max_max)() );
    2034             : 
    2035     1873254 :   ulong node_lo = bplus->node_lo;
    2036     1873254 :   ulong leaf_lo = bplus->leaf_lo;
    2037             : 
    2038     1873254 :   BPLUS_TEST( node_lo==fd_ulong_align_up(                    sizeof( BPLUS_(private_t)      ), BPLUS_NODE_ALIGN ) );
    2039     1873254 :   BPLUS_TEST( leaf_lo==fd_ulong_align_up( node_lo + node_max*sizeof( BPLUS_(private_node_t) ), BPLUS_LEAF_ALIGN ) );
    2040             : 
    2041     1873254 :   ulong node_hi = node_lo + node_max*sizeof( BPLUS_(private_node_t) );
    2042     1873254 :   ulong leaf_hi = leaf_lo + leaf_max*sizeof( BPLUS_(private_leaf_t) );
    2043             : 
    2044     1873254 :   ulong root_off     = bplus->root_off;
    2045     1873254 :   ulong leaf_min_off = bplus->leaf_min_off;
    2046     1873254 :   ulong leaf_max_off = bplus->leaf_max_off;
    2047             : 
    2048     1873254 :   if( FD_LIKELY( root_off ) ) {
    2049             : 
    2050     1873170 :     BPLUS_TEST( node_lo<=root_off ); BPLUS_TEST( root_off<leaf_hi  );
    2051     1873170 :     BPLUS_TEST( fd_ulong_is_aligned( root_off, fd_ulong_if( !BPLUS_(private_is_leaf)( root_off, leaf_lo ),
    2052     1873170 :                                                             BPLUS_NODE_ALIGN, BPLUS_LEAF_ALIGN ) ) );
    2053             : 
    2054     1873170 :     BPLUS_TEST( leaf_lo<=leaf_min_off ); BPLUS_TEST( leaf_min_off<leaf_hi  );
    2055     1873170 :     BPLUS_TEST( fd_ulong_is_aligned( leaf_min_off, BPLUS_LEAF_ALIGN ) );
    2056             : 
    2057     1873170 :     BPLUS_TEST( leaf_lo<=leaf_max_off ); BPLUS_TEST( leaf_max_off<leaf_hi  );
    2058     1873170 :     BPLUS_TEST( fd_ulong_is_aligned( leaf_max_off, BPLUS_LEAF_ALIGN ) );
    2059             : 
    2060     1873170 :   } else {
    2061             : 
    2062          84 :     BPLUS_TEST( !leaf_min_off );
    2063          84 :     BPLUS_TEST( !leaf_max_off );
    2064             : 
    2065          84 :   }
    2066             : 
    2067     1873254 :   ulong node_rem = bplus->node_max;
    2068     1873254 :   ulong leaf_rem = bplus->leaf_max;
    2069             : 
    2070             :   /* Verify node pool */
    2071             : 
    2072     1873254 :   ulong node_off = bplus->node_pool_off;
    2073  1336552995 :   while( FD_LIKELY( node_off ) ) {
    2074  1334679741 :     BPLUS_TEST( node_rem ); node_rem--;
    2075  1334679741 :     BPLUS_TEST( node_lo<=node_off ); BPLUS_TEST( node_off<node_hi  );
    2076  1334679741 :     BPLUS_TEST( fd_ulong_is_aligned( node_off, BPLUS_NODE_ALIGN ) );
    2077  1334679741 :     node_off = BPLUS_(private_node_const)( bplus, node_off )->tree_off[0];
    2078  1334679741 :   }
    2079             : 
    2080             :   /* Verify leaf pool */
    2081             : 
    2082     1873254 :   ulong leaf_off = bplus->leaf_pool_off;
    2083  2242739487 :   while( FD_LIKELY( leaf_off ) ) {
    2084  2240866233 :     BPLUS_TEST( leaf_rem ); leaf_rem--;
    2085  2240866233 :     BPLUS_TEST( leaf_lo<=leaf_off ); BPLUS_TEST( leaf_off<leaf_hi  );
    2086  2240866233 :     BPLUS_TEST( fd_ulong_is_aligned( leaf_off, BPLUS_LEAF_ALIGN ) );
    2087  2240866233 :     leaf_off = BPLUS_(private_leaf_const)( bplus, leaf_off )->next_off;
    2088  2240866233 :   }
    2089             : 
    2090             :   /* Verify the actual tree */
    2091             : 
    2092     1873254 :   ulong leaf_cnt = leaf_rem;
    2093             : 
    2094     1873254 :   if( FD_LIKELY( root_off ) ) { /* optimize for big trees */
    2095             : 
    2096             :     /* At this point, the tree is not empty */
    2097             : 
    2098     1873170 :     ulong tree_min = BPLUS_(private_tree_min)();
    2099     1873170 :     ulong tree_max = BPLUS_(private_tree_max)();
    2100             : 
    2101     1873170 :     ulong pair_min = BPLUS_(private_pair_min)();
    2102     1873170 :     ulong pair_max = BPLUS_(private_pair_max)();
    2103             : 
    2104     1873170 :     ulong               stack_tree_off   [ 128 ];
    2105     1873170 :     ulong               stack_subtree_idx[ 128 ];
    2106     1873170 :     BPLUS_KEY_T const * stack_key_lo     [ 128 ];
    2107     1873170 :     BPLUS_KEY_T const * stack_key_hi     [ 128 ];
    2108     1873170 :     ulong               stack_cnt = 0UL;
    2109     1873170 :     ulong               stack_max = 128UL;
    2110             : 
    2111     1873170 :     ulong               tree_off    = root_off;
    2112     1873170 :     ulong               subtree_idx = 0UL;
    2113     1873170 :     BPLUS_KEY_T const * key_lo      = NULL;
    2114     1873170 :     BPLUS_KEY_T const * key_hi      = NULL;
    2115             : 
    2116  3776521611 :     for(;;) {
    2117             : 
    2118             :       /* At this point, we are still validating the tree rooted at
    2119             :          tree_off and this tree should contain only keys in
    2120             :          [key_lo,key_hi).  key_{lo,hi}==NULL indicates key_{lo,hi} is
    2121             :          {-inf,+inf}.
    2122             : 
    2123             :          If tree is a node, we've validated all of tree's subtrees
    2124             :          [0,subtree_idx).  subtree_idx==0 indicates this is the first
    2125             :          time we've visited this node.
    2126             : 
    2127             :          If tree is a leaf, as we only visit each leaf exactly once,
    2128             :          subtree_idx will be zero (and otherwise ignored). */
    2129             : 
    2130  3776521611 :       if( FD_LIKELY( !BPLUS_(private_is_leaf)( tree_off, leaf_lo ) ) ) { /* tree is a node */
    2131             : 
    2132             :         /* If this is the first time visiting this node, validate it */
    2133             : 
    2134  2180963652 :         if( FD_UNLIKELY( !subtree_idx ) ) {
    2135             : 
    2136             :           /* Validate no loops */
    2137             : 
    2138   587278863 :           BPLUS_TEST( node_rem ); node_rem--;
    2139             : 
    2140             :           /* Validate the node pointer */
    2141             : 
    2142   587278863 :           BPLUS_TEST( node_lo<=tree_off ); BPLUS_TEST( tree_off<node_hi );
    2143   587278863 :           BPLUS_TEST( fd_ulong_is_aligned( tree_off, BPLUS_NODE_ALIGN ) );
    2144             : 
    2145   587278863 :           BPLUS_(private_node_t) const * node = BPLUS_(private_node_const)( bplus, tree_off );
    2146             : 
    2147   587278863 :           BPLUS_KEY_T const * subtree_pivot = node->pivot;
    2148   587278863 :           ulong       const * subtree_off   = node->tree_off;
    2149   587278863 :           ulong               subtree_cnt   = node->tree_cnt;
    2150             : 
    2151             :           /* Validate the node tree count */
    2152             : 
    2153   587278863 :           BPLUS_TEST( fd_ulong_if( tree_off!=root_off, tree_min, 2UL )<=subtree_cnt );
    2154   587278863 :           BPLUS_TEST( subtree_cnt<=tree_max );
    2155             : 
    2156             :           /* Validate the node tree offsets */
    2157             : 
    2158   587278863 :           int is_leaf = BPLUS_(private_is_leaf)( subtree_off[0], leaf_lo );
    2159             : 
    2160   587278863 :           ulong lo    = fd_ulong_if( is_leaf, leaf_lo,          node_lo          );
    2161   587278863 :           ulong hi    = fd_ulong_if( is_leaf, leaf_hi,          node_hi          );
    2162   587278863 :           ulong align = fd_ulong_if( is_leaf, BPLUS_LEAF_ALIGN, BPLUS_NODE_ALIGN );
    2163             : 
    2164  2768242515 :           for( ulong idx=0UL; idx<subtree_cnt; idx++ ) {
    2165  2180963652 :             ulong off = subtree_off[ idx ];
    2166  2180963652 :             BPLUS_TEST( lo<=off ); BPLUS_TEST( off<hi );
    2167  2180963652 :             BPLUS_TEST( fd_ulong_is_aligned( off, align ) );
    2168  2180963652 :           }
    2169             : 
    2170             :           /* Validate the node pivots */
    2171             : 
    2172   587278863 :           if( FD_LIKELY( key_lo ) ) BPLUS_TEST( BPLUS_(private_key_cmp)( key_lo, &subtree_pivot[0] )<0 );
    2173             : 
    2174  1593684789 :           for( ulong idx=1UL; idx<subtree_cnt-1UL; idx++ )
    2175  1006405926 :             BPLUS_TEST( BPLUS_(private_key_cmp)( &subtree_pivot[idx-1UL], &subtree_pivot[idx] )<0 );
    2176             : 
    2177   587278863 :           if( FD_LIKELY( key_hi ) ) BPLUS_TEST( BPLUS_(private_key_cmp)( &subtree_pivot[subtree_cnt-2UL], key_hi )<0 );
    2178   587278863 :         }
    2179             : 
    2180             :         /* At this point, tree_off is a bplus global offset of a
    2181             :            verified node (verified either just now or on a previous
    2182             :            iteration).  If subtree_idx isn't the last subtree, push
    2183             :            subtree_idx+1 onto the stack for a later iteration. */
    2184             : 
    2185  2180963652 :         BPLUS_(private_node_t) const * node = BPLUS_(private_node_const)( bplus, tree_off );
    2186             : 
    2187  2180963652 :         BPLUS_KEY_T const * subtree_pivot = node->pivot;
    2188  2180963652 :         ulong       const * subtree_off   = node->tree_off;
    2189  2180963652 :         ulong               subtree_cnt   = node->tree_cnt;
    2190             : 
    2191  2180963652 :         if( FD_LIKELY( (subtree_idx+1UL)<subtree_cnt ) ) {
    2192  1593684789 :           BPLUS_TEST( stack_cnt<stack_max );
    2193  1593684789 :           stack_tree_off   [ stack_cnt ] = tree_off;
    2194  1593684789 :           stack_subtree_idx[ stack_cnt ] = subtree_idx+1UL;
    2195  1593684789 :           stack_key_lo     [ stack_cnt ] = key_lo;
    2196  1593684789 :           stack_key_hi     [ stack_cnt ] = key_hi;
    2197  1593684789 :           stack_cnt++;
    2198  1593684789 :         }
    2199             : 
    2200             :         /* And recurse into subtree_idx for the next iteration.  Note
    2201             :            this node's key_lo is subtree_idx 0's key_lo and this node's
    2202             :            key_hi is subtree_idx tree_cnt-1's key_hi. */
    2203             : 
    2204  2180963652 :         /**/                                           tree_off =  subtree_off  [ subtree_idx     ];
    2205  2180963652 :         if( FD_LIKELY( subtree_idx>0UL             ) ) key_lo   = &subtree_pivot[ subtree_idx-1UL ];
    2206  2180963652 :         if( FD_LIKELY( subtree_idx<subtree_cnt-1UL ) ) key_hi   = &subtree_pivot[ subtree_idx     ];
    2207  2180963652 :         subtree_idx = 0UL;
    2208  2180963652 :         continue;
    2209  2180963652 :       }
    2210             : 
    2211             :       /* At this point, tree is a leaf.  Validate no loops. */
    2212             : 
    2213  1595557959 :       BPLUS_TEST( leaf_rem ); leaf_rem--;
    2214             : 
    2215             :       /* Validate the leaf pointer */
    2216             : 
    2217  1595557959 :       BPLUS_TEST( leaf_lo<=tree_off ); BPLUS_TEST( tree_off<leaf_hi );
    2218  1595557959 :       BPLUS_TEST( fd_ulong_is_aligned( tree_off, BPLUS_LEAF_ALIGN ) );
    2219             : 
    2220  1595557959 :       BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, tree_off );
    2221             : 
    2222  1595557959 :       BPLUS_PAIR_T const * pair     = leaf->pair;
    2223  1595557959 :       ulong                pair_cnt = leaf->pair_cnt;
    2224             : 
    2225             :       /* Validate the leaf pair count */
    2226             : 
    2227  1595557959 :       BPLUS_TEST( fd_ulong_if( tree_off!=root_off, pair_min, 1UL )<=pair_cnt );
    2228  1595557959 :       BPLUS_TEST( pair_cnt<=pair_max );
    2229             : 
    2230             :       /* Validate the leaf pairs */
    2231             : 
    2232  1595557959 :       if( FD_LIKELY( key_lo ) ) BPLUS_TEST( BPLUS_(private_key_cmp)( key_lo, &pair[0].BPLUS_PAIR_KEY )<=0 );
    2233             : 
    2234  4031130432 :       for( ulong idx=1UL; idx<pair_cnt; idx++ )
    2235  2435572473 :         BPLUS_TEST( BPLUS_(private_key_cmp)( &pair[idx-1UL].BPLUS_PAIR_KEY, &pair[idx].BPLUS_PAIR_KEY )<0 );
    2236             : 
    2237  1595557959 :       if( FD_LIKELY( key_hi ) ) BPLUS_TEST( BPLUS_(private_key_cmp)( &pair[ pair_cnt-1UL ].BPLUS_PAIR_KEY, key_hi )<0 );
    2238             : 
    2239             :       /* (Note that we validate the leaf ordered iterator below.) */
    2240             : 
    2241             :       /* If no more work to do, abort.  Otherwise, get the next node to
    2242             :          process. */
    2243             : 
    2244  1595557959 :       if( FD_UNLIKELY( !stack_cnt ) ) break;
    2245  1593684789 :       stack_cnt--;
    2246  1593684789 :       tree_off    = stack_tree_off   [ stack_cnt ];
    2247  1593684789 :       subtree_idx = stack_subtree_idx[ stack_cnt ];
    2248  1593684789 :       key_lo      = stack_key_lo     [ stack_cnt ];
    2249  1593684789 :       key_hi      = stack_key_hi     [ stack_cnt ];
    2250  1593684789 :     }
    2251     1873170 :   }
    2252             : 
    2253             :   /* Validate all nodes and leaves touched */
    2254             : 
    2255     1873254 :   BPLUS_TEST( !node_rem );
    2256     1873254 :   BPLUS_TEST( !leaf_rem );
    2257             : 
    2258             :   /* Validate leaf iteration */
    2259             : 
    2260     1873254 :   leaf_rem = leaf_cnt;
    2261             : 
    2262     1873254 :   ulong leaf_prev_off = 0UL;
    2263     1873254 :   /**/  leaf_off      = bplus->leaf_min_off;
    2264  1597431213 :   while( leaf_off ) { /* Validates leaf->next_off for last iteration */
    2265             : 
    2266             :     /* Validate no loops */
    2267             : 
    2268  1595557959 :     BPLUS_TEST( leaf_rem ); leaf_rem--;
    2269             : 
    2270             :     /* Validate forward iteration (validates bplus->leaf_min_off first
    2271             :        iteration, validates leaf->next_off interior iterations) */
    2272             : 
    2273  1595557959 :     BPLUS_TEST( leaf_lo<=leaf_off ); BPLUS_TEST( leaf_off<leaf_hi );
    2274  1595557959 :     BPLUS_TEST( fd_ulong_is_aligned( leaf_off, BPLUS_LEAF_ALIGN ) );
    2275  1595557959 :     BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, leaf_off );
    2276             : 
    2277             :     /* Validate reverse iteration (validates leaf->prev_off,
    2278             :        bplus->leaf_max_off validated below) */
    2279             : 
    2280  1595557959 :     BPLUS_TEST( leaf->prev_off==leaf_prev_off );
    2281             : 
    2282             :     /* Validate ordered leaves */
    2283             : 
    2284  1595557959 :     if( FD_LIKELY( leaf_prev_off ) ) {
    2285  1593684789 :       BPLUS_(private_leaf_t) const * prev = BPLUS_(private_leaf_const)( bplus, leaf_prev_off );
    2286  1593684789 :       BPLUS_TEST( BPLUS_(private_key_cmp)( &prev->pair[ prev->pair_cnt-1UL ].BPLUS_PAIR_KEY, &leaf->pair[ 0 ].BPLUS_PAIR_KEY )<0 );
    2287  1593684789 :     }
    2288             : 
    2289  1595557959 :     leaf_prev_off = leaf_off;
    2290  1595557959 :     leaf_off      = leaf->next_off;
    2291  1595557959 :   }
    2292             : 
    2293     1873254 :   BPLUS_TEST( bplus->leaf_max_off==leaf_prev_off ); /* Validates bplus->leaf_max_off */
    2294     1873254 :   BPLUS_TEST( !leaf_rem );                          /* All leaves in tree covered */
    2295             : 
    2296     1873254 : # undef BPLUS_TEST
    2297             : 
    2298     1873254 :   return 0;
    2299     1873254 : }
    2300             : 
    2301             : #endif
    2302             : 
    2303             : #undef BPLUS_STATIC
    2304             : #undef BPLUS_
    2305             : 
    2306             : #undef BPLUS_IMPL_STYLE
    2307             : #undef BPLUS_MAGIC
    2308             : #undef BPLUS_LEAF_ALIGN
    2309             : #undef BPLUS_NODE_ALIGN
    2310             : #undef BPLUS_ALIGN
    2311             : #undef BPLUS_TREE_MAX
    2312             : #undef BPLUS_NODE_MAX
    2313             : #undef BPLUS_PAIR_T
    2314             : #undef BPLUS_KEY_T
    2315             : #undef BPLUS_NAME

Generated by: LCOV version 1.14