LCOV - code coverage report
Current view: top level - util/tmpl - fd_bplus.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 758 767 98.8 %
Date: 2026-02-25 06:06:56 Functions: 61 61 100.0 %

          Line data    Source code
       1             : /* Generate prototypes, inlines and/or implementations for an ultra high
       2             :    performance bplus-tree-based key-val store.  A bplus tree can be
       3             :    persisted beyond the lifetime of creating process, used concurrently,
       4             :    used IPC, relocated in memory, naively serialized/deserialized and/or
       5             :    moved between hosts.  Virtually all operations on a bplus tree are a
       6             :    fast O(lg N) (where N is the number of elements stored) or better in
       7             :    worst case.
       8             : 
       9             :    At its core, this is fast binary search on a sorted array.  But the
      10             :    sorted array has been partition into leaves where each leaf is
      11             :    responsible for a continuous disjoint portion of the key space and
      12             :    union of the ranges covered by the leaves covers the entire key
      13             :    space.  The leaves are stored in a tree whose nodes have a large and
      14             :    flexible number of branches per each node that specify how leaves
      15             :    completely partition key space.  Further, to support fast forward and
      16             :    reverse iteration, the leaves are organized into a sorted doubly
      17             :    linked list.  Lastly, the interior nodes and leaves are guaranteed to
      18             :    be full enough that query has a fast O(lg N) worst case and have
      19             :    enough slack that insert / upsert / remove also have fast O(lg N)
      20             :    worst case.
      21             : 
      22             :    This leads to a number of improvements over textbook bplus trees,
      23             :    including:
      24             : 
      25             :    - Removal doesn't require nearly as much reshuffling of the interior
      26             :      nodes.  The only requirement here is that interior nodes form a
      27             :      complete partitioning of the key space.  (There is no requirement
      28             :      that interior nodes store copy of keys found in leaf nodes.)
      29             : 
      30             :    - No extra storage in the node is required for identifying whether or
      31             :      not an interior node or a leaf.
      32             : 
      33             :    - The leaf pair max radix can be independently tuned from the
      34             :      interior node pair radix (especially useful if sizing interior
      35             :      nodes and leaves to match things like memory page sizes).
      36             : 
      37             :    - Supports fast reverse iteration.
      38             : 
      39             :    Typical usage:
      40             : 
      41             :      struct mypair {
      42             :        mykey_t key;
      43             : 
      44             :        ... key can be located arbitrarily in struct (and renamed if
      45             :        ... needed ... see BPLUS_PAIR_KEY).  It is managed by the bplus
      46             :        ... tree and should not be modified).
      47             : 
      48             :        ... IMPORTANT SAFETY TIP!  The location of a pair can be changed
      49             :        ... by insert / upsert / remove operations.
      50             : 
      51             :      };
      52             : 
      53             :      typedef struct mypair mypair_t;
      54             : 
      55             :      #define BPLUS_NAME         mybplus
      56             :      #define BPLUS_KEY_T        mykey_t
      57             :      #define BPLUS_PAIR_T       mypair_t
      58             :      #define BPLUS_KEY_CMP(a,b) mykeycmp( (a), (b) )
      59             :      #include "fd_bplus.c"
      60             : 
      61             :    will provide the following APIs as a header only style library in the
      62             :    compilation unit:
      63             : 
      64             :      // A myplus_t is an opaque handle to a join to a bplus tree
      65             : 
      66             :      struct mybplus_private;
      67             :      typedef struct mybplus_private bplus_t;
      68             : 
      69             :      // A myplus_iter_t is an opaque handle to a bplus tree iterator
      70             : 
      71             :      struct mybplus_private_iter;
      72             :      typedef struct mybplus_private_iter mybplus_iter_t;
      73             : 
      74             :      // Constructors
      75             : 
      76             :      // mybplus_{leaf,node}_max_est returns a conservative estimate of
      77             :      // the number of {leaves,nodes} needed for a worst case bplus tree
      78             :      // containing ele_max_est elements.
      79             : 
      80             :      ulong mybplus_leaf_max_est( ulong ele_max_est );
      81             :      ulong mybplus_node_max_est( ulong ele_max_est );
      82             : 
      83             :      // mybplus_{align,footprint,new,join,leave,delete} have the usual
      84             :      // persistent IPC object constructors / destructors semantics.
      85             : 
      86             :      ulong mybplus_align    ( void );
      87             :      ulong mybplus_footprint( ulong node_max, ulong leaf_max );
      88             : 
      89             :      void      * mybplus_new   ( void * shmem, ulong node_max, ulong leaf_max );
      90             :      mybplus_t * mybplus_join  ( void * shbplus );
      91             :      void      * mybplus_leave ( mybplus_t * join );
      92             :      void      * mybplus_delete( void * shbplus );
      93             : 
      94             :      // Accessors
      95             : 
      96             :      // mybplus_{node,leaf}_max return the {node,leaf}_max values used
      97             :      // to construct the bplus tree.  Assumes join is a current local
      98             :      // join.  Fast O(1) worst case.
      99             : 
     100             :      ulong mybplus_node_max( mybplus_t const * join );
     101             :      ulong mybplus_leaf_max( mybplus_t const * join );
     102             : 
     103             :      // mybplus_is_empty returns 1 if the bplus tree contains no pairs
     104             :      // and 0 otherwise.  Assumes join is a current local join.  Fast
     105             :      // O(1) worst case.
     106             : 
     107             :      int mybplus_is_empty( mybplus_t const * join );
     108             : 
     109             :      // mybplus_{min,max} return the pointer in the caller's local
     110             :      // address space to the pair in the bplus tree with the {min,max}
     111             :      // key.  Assumes join is a current local join and bplus tree is not
     112             :      // empty.  The lifetime of the returned pointer is the lesser of
     113             :      // the lifetime of the local join or the next insert / upsert /
     114             :      // remove operation on the bplus tree.  The bplus tree retains
     115             :      // ownership of the returned pair and the caller should not modify
     116             :      // the pair key field.  Fast O(1) worst case.
     117             :      //
     118             :      // mybplus_{min,max}_const is a const-correct version.
     119             : 
     120             :      mypair_t const * mybplus_min_const( mybplus_t const * join );
     121             :      mypair_t const * mybplus_max_const( mybplus_t const * join );
     122             :      mypair_t       * mybplus_min      ( mybplus_t       * join );
     123             :      mypair_t       * mybplus_max      ( mybplus_t       * join );
     124             : 
     125             :      // mybplus_query returns the pointer in the caller's local address
     126             :      // space to the pair in the bplus tree that matches the key pointed
     127             :      // to by query or NULL if there is no key matching query in the
     128             :      // bplus tree.  Assumes join is a current local join.  The lifetime
     129             :      // of the returned pointer is the lesser of the lifetime of the
     130             :      // local join or the next insert / upsert / remove operation on the
     131             :      // bplus tree.  The bplus tree retains ownership of the returned
     132             :      // pair and the caller should not modify the key field.  The bplus
     133             :      // tree has no interest in query in return.  Fast O(lg N) worst
     134             :      // case.
     135             :      //
     136             :      // mybplus_query_const is a const-correct version.
     137             : 
     138             :      mypair_t const * mybplus_query_const( mybplus_t const * join, mykey_t const * query );
     139             :      mypair_t *       mybplus_query(       mybplus_t       * join, mykey_t const * query );
     140             : 
     141             :      // Operations
     142             : 
     143             :      // mybplus_insert inserts a key into the bplus tree.  Assumes join
     144             :      // is a current local join and key points in the caller's address
     145             :      // space to the key to insert.  The bplus tree has no interest in
     146             :      // key in return.
     147             :      //
     148             :      // On success, returns the location in the caller's address space
     149             :      // where key was inserted.  The lifetime of the returned pointer is
     150             :      // the lesser of the lifetime of the local join or there is an
     151             :      // insert / upsert / remove operation on the bplus tree.  The
     152             :      // caller should not modify the pair key field but is free to
     153             :      // modify all the other values.
     154             :      //
     155             :      // On failure, returns NULL.  Reasons for failure are the key was
     156             :      // already in the tree (locations of pairs might have changed),
     157             :      // there were not enough nodes (locations of pairs did not change)
     158             :      // or there were not enough leaves available to complete the insert
     159             :      // (locations of pairs did not change).
     160             :      //
     161             :      // mybplus_upsert is nearly equivalent to:
     162             :      //
     163             :      //   int insert;
     164             :      //   mypair_t *    pair = mybplus_query ( join, key ); insert = 0;
     165             :      //   if( !pair ) { pair = mybplus_insert( join, key ); insert = 1; }
     166             :      //   if( pair && _opt_insert ) *_opt_insert = insert;
     167             :      //
     168             :      // but potentially faster as it only traverses the bplus tree once.
     169             :      // The "nearly" qualifier is that, unlike the above snippet, the
     170             :      // upsert might change the location of keys even if key is already
     171             :      // in the bplus tree.  Fast O(lg N) worst case.
     172             : 
     173             :      mypair_t * mybplus_insert( mybplus_t * join, mykey_t const * key );
     174             :      mypair_t * mybplus_upsert( mybplus_t * join, mykey_t const * key, int * _opt_insert );
     175             : 
     176             :      // mybplus_remove_key removes a key from the bplus tree.  Assumes
     177             :      // join is a current local join and key points in the caller's
     178             :      // address space to the key to remove.  Returns 0 on success and -1
     179             :      // if the key was not found in the tree.  The bplus tree has no
     180             :      // interest in key in return.  Fast O(lg N) worst case.
     181             : 
     182             :      int mybplus_remove_key( mybplus_t * join, mykey_t const * key );
     183             : 
     184             :      // mybplus_remove removes the pair pointed to by pair from the
     185             :      // bplus tree.  Assumes join is a current local join and pair is a
     186             :      // pointer in the caller's local address space to a pair that is
     187             :      // currently in the bplus tree.  The pair is no longer in the bplus
     188             :      // tree on return.  Fast O(lg N) worst case.
     189             : 
     190             :      void mybplus_remove( mybplus_t * join, mypair_t * pair );
     191             : 
     192             :      // mybplus_flush removes all pairs from the bplus tree.  Assumes
     193             :      // join is a current local join.  There are no pairs in the bplus
     194             :      // tree on return.  Fast O( node_max + leaf_max ) worst case.
     195             : 
     196             :      void mybplus_flush( mybplus_t * join );
     197             : 
     198             :      // mybplus_verify validates the bplus tree pointed by join.
     199             :      // Returns 0 on success and -1 on failure (logs details).
     200             :      // O(node_max+leaf_max) worst case.
     201             : 
     202             :      int mybplus_verify( mybplus_t const * join );
     203             : 
     204             :      // Iteration
     205             : 
     206             :      // mybplus_iter_nul returns an iterator positioned at nul.  Fast
     207             :      // O(1) worst case.
     208             :      //
     209             :      // mybplus_iter_min returns an iterator positioned at the min pair
     210             :      // or nul if the bplus is empty.  Fast O(1) worst case.
     211             :      //
     212             :      // mybplus_iter_max returns an iterator positioned at the max pair
     213             :      // or nul if the bplus is empty.  Fast O(1) worst case.
     214             :      //
     215             :      // mybplus_iter_ge returns an iterator positioned at the first pair
     216             :      // greater than or equal to query or at nul if all keys are less
     217             :      // than query.  query==NULL is equivalent to "+inf".  Fast O(lg N)
     218             :      // worst case.
     219             :      //
     220             :      // mybplus_iter_gt returns an iterator positioned at the first pair
     221             :      // greater than query or at nul if all keys are less than or equal
     222             :      // to query.  query==NULL is equivalent to "+inf".  Fast O(lg N)
     223             :      // worst case.
     224             :      //
     225             :      // mybplus_iter_le returns an iterator positioned at the last pair
     226             :      // less than or equal to query or at nul if all keys are greater
     227             :      // than query.  query==NULL is equivalent to "-inf".  Fast O(lg N)
     228             :      // worst case.
     229             :      //
     230             :      // mybplus_iter_lt returns an iterator positioned at the last pair
     231             :      // less than to query or at nul if all keys are greater than or
     232             :      // equal to query.  query==NULL is equivalent to "-inf".  Fast
     233             :      // O(lg N) worst case.
     234             :      //
     235             :      // mybplus_iter_next returns an iterator positioned at the next
     236             :      // pair or nul if the iterator is currently positioned at last
     237             :      // pair.  Fast O(1) worst case.
     238             :      //
     239             :      // mybplus_iter_prev returns an iterator positioned at the previous
     240             :      // pair or nul if the iterator is currently positioned at first
     241             :      // pair.  Fast O(1) worst case.
     242             :      //
     243             :      // mybplus_iter_eq returns true if iter is positioned at the same
     244             :      // place fini is positioned.  Fast O(1) worst case.
     245             :      //
     246             :      // mybplus_iter_pair returns a pair associated with the current
     247             :      // iteration position.  mybplus_iter_pair_const is a const correct
     248             :      // version.  Fast O(1) worst case.
     249             :      //
     250             :      // Assumes join is a current local join and query points to a valid
     251             :      // key in the caller's local address space.  Retains no interest in
     252             :      // query on return.
     253             :      //
     254             :      // Example: iterate over all pairs in ascending order:
     255             :      //
     256             :      //   for( mybplus_iter_t iter = mybplus_iter_min( bplus );
     257             :      //        !mybplus_iter_eq_nul( bplus, iter );
     258             :      //        iter = mybplus_iter_next( bplus, iter ) ) {
     259             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     260             :      //     ... process pair here
     261             :      //     ... do not insert, upsert remove keys from bplus here
     262             :      //     ... do not modify key of pair here
     263             :      //   }
     264             :      //
     265             :      // Example: iterate over all pairs in descending order:
     266             :      //
     267             :      //   for( mybplus_iter_t iter = mybplus_iter_max( bplus );
     268             :      //        !mybplus_iter_eq_nul( bplus, iter );
     269             :      //        iter = mybplus_iter_prev( bplus, iter ) ) {
     270             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     271             :      //     ... process pair here
     272             :      //     ... do not insert, upsert remove keys from bplus here
     273             :      //     ... do not modify key of pair here
     274             :      //   }
     275             :      //
     276             :      // Example: iterate over all pairs with keys in [key0,key1) in
     277             :      // ascending order (assumes key1>=key0):
     278             :      //
     279             :      //   mybplus_iter_t iter = mybplus_iter_ge( bplus, key0 );
     280             :      //   mybplus_iter_t fini = mybplus_iter_ge( bplus, key1 ); // key1==NULL will iterate over all pairs with keys >= key0
     281             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     282             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     283             :      //
     284             :      //     ... process pair here
     285             :      //     ... do not insert, upsert or remove keys from bplus here
     286             :      //     ... do not modify key of pair here
     287             :      //
     288             :      //     iter = mybplus_iter_next( bplus, iter );
     289             :      //   }
     290             :      //
     291             :      // Example: iterate over all pairs with keys in [key0,key1] in
     292             :      // ascending order (assumes key1>=key0):
     293             :      //
     294             :      //   mybplus_iter_t iter = mybplus_iter_ge( bplus, key0 );
     295             :      //   mybplus_iter_t fini = mybplus_iter_gt( bplus, key1 ); // key1==NULL will iterate over all pairs with keys >= key0
     296             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     297             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     298             :      //
     299             :      //     ... process pair here
     300             :      //     ... do not insert, upsert or remove keys from bplus here
     301             :      //     ... do not modify key of pair here
     302             :      //
     303             :      //     iter = mybplus_iter_next( bplus, iter );
     304             :      //   }
     305             :      //
     306             :      // Example: iterate over all pairs with keys in (key0,key1) in
     307             :      // ascending order (assumes key1>=key0):
     308             :      //
     309             :      //   mybplus_iter_t iter = mybplus_iter_gt( bplus, key0 );
     310             :      //   mybplus_iter_t fini = mybplus_iter_ge( bplus, key1 ); // key1==NULL will iterate over all pairs with keys > key0
     311             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     312             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     313             :      //
     314             :      //     ... process pair here
     315             :      //     ... do not insert, upsert or remove keys from bplus here
     316             :      //     ... do not modify key of pair here
     317             :      //
     318             :      //     iter = mybplus_iter_next( bplus, iter );
     319             :      //   }
     320             :      //
     321             :      // Example: iterate over all pairs with keys in (key0,key1] in
     322             :      // ascending order (assumes key1>=key0):
     323             :      //
     324             :      //   mybplus_iter_t iter = mybplus_iter_gt( bplus, key0 );
     325             :      //   mybplus_iter_t fini = mybplus_iter_gt( bplus, key1 ); // key1==NULL will iterate over all pairs with keys > key0
     326             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     327             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     328             :      //
     329             :      //     ... process pair here
     330             :      //     ... do not insert, upsert or remove keys from bplus here
     331             :      //     ... do not modify key of pair here
     332             :      //
     333             :      //     iter = mybplus_iter_next( bplus, iter );
     334             :      //   }
     335             :      //
     336             :      // Example: iterate over all pairs with keys in [key0,key1) in
     337             :      // descending order (assumes key1>=key0):
     338             :      //
     339             :      //   mybplus_iter_t iter = mybplus_iter_lt( bplus, key1 );
     340             :      //   mybplus_iter_t fini = mybplus_iter_lt( bplus, key0 ); // key0==NULL will iterate over all pairs with keys < key1
     341             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     342             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     343             :      //
     344             :      //     ... process pair here
     345             :      //     ... do not insert, upsert or remove keys from bplus here
     346             :      //     ... do not modify key of pair here
     347             :      //
     348             :      //     iter = mybplus_iter_prev( bplus, iter );
     349             :      //   }
     350             :      //
     351             :      // Example: iterate over all pairs with keys in [key0,key1] in
     352             :      // descending order (assumes key1>=key0):
     353             :      //
     354             :      //   mybplus_iter_t iter = mybplus_iter_le( bplus, key1 );
     355             :      //   mybplus_iter_t fini = mybplus_iter_lt( bplus, key0 ); // key0==NULL will iterate over all pairs with keys <= key1
     356             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     357             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     358             :      //
     359             :      //     ... process pair here
     360             :      //     ... do not insert, upsert or remove keys from bplus here
     361             :      //     ... do not modify key of pair here
     362             :      //
     363             :      //     iter = mybplus_iter_prev( bplus, iter );
     364             :      //   }
     365             :      //
     366             :      // Example: iterate over all pairs with keys in (key0,key1) in
     367             :      // descending order (assumes key1>=key0):
     368             :      //
     369             :      //   mybplus_iter_t iter = mybplus_iter_lt( bplus, key1 );
     370             :      //   mybplus_iter_t fini = mybplus_iter_le( bplus, key0 ); // key0==NULL will iterate over all pairs with keys < key1
     371             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     372             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     373             :      //
     374             :      //     ... process pair here
     375             :      //     ... do not insert, upsert or remove keys from bplus here
     376             :      //     ... do not modify key of pair here
     377             :      //
     378             :      //     iter = mybplus_iter_prev( bplus, iter );
     379             :      //   }
     380             :      //
     381             :      // Example: iterate over all pairs with keys in (key0,key1] in
     382             :      // descending order (assumes key1>=key0):
     383             :      //
     384             :      //   mybplus_iter_t iter = mybplus_iter_le( bplus, key1 );
     385             :      //   mybplus_iter_t fini = mybplus_iter_le( bplus, key0 ); // key0==NULL will iterate over all pairs with keys < key1
     386             :      //   while( !mybplus_iter_eq( bplus, iter, fini ) ) {
     387             :      //     mypair_t * pair = mybplus_iter_pair( bplus, iter );
     388             :      //
     389             :      //     ... process pair here
     390             :      //     ... do not insert, upsert or remove keys from bplus here
     391             :      //     ... do not modify key of pair here
     392             :      //
     393             :      //     iter = mybplus_iter_prev( bplus, iter );
     394             :      //   }
     395             : 
     396             :      mybplus_iter_t mybplus_iter_nul( mybplus_t const * join );
     397             :      mybplus_iter_t mybplus_iter_min( mybplus_t const * join );
     398             :      mybplus_iter_t mybplus_iter_max( mybplus_t const * join );
     399             : 
     400             :      mybplus_iter_t mybplus_iter_ge( mybplus_t const * join, mykey_t const * query );
     401             :      mybplus_iter_t mybplus_iter_gt( mybplus_t const * join, mykey_t const * query );
     402             :      mybplus_iter_t mybplus_iter_le( mybplus_t const * join, mykey_t const * query );
     403             :      mybplus_iter_t mybplus_iter_lt( mybplus_t const * join, mykey_t const * query );
     404             : 
     405             :      int mybplus_iter_eq    ( mybplus_t const * join, mybplus_iter_t i0, mybplus_iter_t i1 );
     406             :      int mybplus_iter_eq_nul( mybplus_t const * join, mybplus_iter_t iter );
     407             : 
     408             :      mybplus_iter_t mybplus_iter_next( mybplus_t const * join, mybplus_iter_t iter );
     409             :      mybplus_iter_t mybplus_iter_prev( mybplus_t const * join, mybplus_iter_t iter );
     410             : 
     411             :      mypair_t const * mybplus_iter_pair_const( mybplus_t const * join, mybplus_iter_t iter );
     412             :      mypair_t       * mybplus_iter_pair      ( mybplus_t *       join, mybplus_iter_t iter );
     413             : 
     414             :    You can do this as often as you like in a compilation unit to get
     415             :    different types of bplus trees.  Variants exist for making header
     416             :    protoypes only and/or implementations if doing a library with
     417             :    multiple compilation units.  Further, options exist to use different
     418             :    hashing functions, comparison functions, etc as detailed below. */
     419             : 
     420             : /* BPLUS_NAME gives the API prefix. */
     421             : 
     422             : #ifndef BPLUS_NAME
     423             : #error "Define BPLUS_NAME"
     424             : #endif
     425             : 
     426             : /* BPLUS_KEY_T gives the key type.  Should be a plain-old-data type with
     427             :    a total order. */
     428             : 
     429             : #ifndef BPLUS_KEY_T
     430             : #error "Define BPLUS_KEY_T"
     431             : #endif
     432             : 
     433             : /* BPLUS_PAIR_T gives the pair type.  Should be a structure of the form:
     434             : 
     435             :      typedef struct BPLUS_PAIR {
     436             :        BPLUS_KEY_T key; // Can be arbitrarily placed in structure, should not be modified by the user
     437             :        ... arbitrary user fields
     438             :      } BPLUS_PAIR_T;
     439             : 
     440             :   (Or the appropriate field name given BPLUS_PAIR_KEY below.) */
     441             : 
     442             : #ifndef BPLUS_PAIR_T
     443             : #error "Define BPLUS_PAIR_T"
     444             : #endif
     445             : 
     446             : /* BPLUS_PAIR_KEY gives the name of the key field in the BPLUS_PAIR_KEY.
     447             :    Defaults to key. */
     448             : 
     449             : #ifndef BPLUS_PAIR_KEY
     450             : #define BPLUS_PAIR_KEY key
     451             : #endif
     452             : 
     453             : /* BPLUS_KEY_CMP compares the keys pointed to by a and b and returns
     454             :    {<0,0,>0} if the a is {less than,equal to,greater than}.  a and b
     455             :    will be valid pointers to key .  Defaults to memcmp based. */
     456             : 
     457             : #ifndef BPLUS_KEY_CMP
     458             : #define BPLUS_KEY_CMP(a,b) memcmp( (a), (b), sizeof(*(a)) )
     459             : #endif
     460             : 
     461             : /* BPLUS_TREE_MAX is the maximum number of children a non-leaf node can
     462             :    have.  Must be even, >=4 and <<< ULONG_MAX.  Defaults to 128. */
     463             : 
     464             : #ifndef BPLUS_TREE_MAX
     465             : #define BPLUS_TREE_MAX 128
     466             : #endif
     467             : 
     468             : /* BPLUS_PAIR_MAX is the maximum number of children a leaf node can
     469             :    have.  Must be even, >=4 and <<< ULONG_MAX.  Defaults to 128. */
     470             : 
     471             : #ifndef BPLUS_PAIR_MAX
     472             : #define BPLUS_PAIR_MAX 128
     473             : #endif
     474             : 
     475             : /* BPLUS_ALIGN gives the default alignment of the BPLUS region.  Should
     476             :    be a positive integer power of 2.  Defaults to 128. */
     477             : 
     478             : #ifndef BPLUS_ALIGN
     479          12 : #define BPLUS_ALIGN 128
     480             : #endif
     481             : 
     482             : /* BPLUS_NODE_ALIGN gives the default alignment of an interior node.
     483             :    Should be a positive integer power of 2 of at most BPLUS_ALIGN.
     484             :    Defaults to 128. */
     485             : 
     486             : #ifndef BPLUS_NODE_ALIGN
     487   589152144 : #define BPLUS_NODE_ALIGN 128
     488             : #endif
     489             : 
     490             : /* BPLUS_LEAF_ALIGN gives the default alignment of a leaf node.  Should
     491             :    be a positive integer power of 2 of at most BPLUS_ALIGN.  Defaults to
     492             :    128. */
     493             : 
     494             : #ifndef BPLUS_LEAF_ALIGN
     495   589152144 : #define BPLUS_LEAF_ALIGN 128
     496             : #endif
     497             : 
     498             : /* BPLUS_MAGIC is the structure magic number to use to aid in persistent
     499             :    and or IPC usage. */
     500             : 
     501             : #ifndef BPLUS_MAGIC
     502           3 : #define BPLUS_MAGIC (0xfdb91c53a61c0000UL) /* FD BPLUS MAGIC 0000 */
     503             : #endif
     504             : 
     505             : /* BPLUS_IMPL_STYLE indicates what this generator should output:
     506             :      0 - static implementation
     507             :      1 - library header
     508             :      2 - library implementation */
     509             : 
     510             : #ifndef BPLUS_IMPL_STYLE
     511             : #define BPLUS_IMPL_STLYE 0
     512             : #endif
     513             : 
     514             : /**********************************************************************/
     515             : 
     516 21022682838 : #define BPLUS_(name)FD_EXPAND_THEN_CONCAT3(BPLUS_NAME,_,name)
     517             : 
     518             : #if BPLUS_IMPL_STYLE==0
     519             : #define BPLUS_STATIC FD_FN_UNUSED static
     520             : #else
     521             : #define BPLUS_STATIC
     522             : #endif
     523             : 
     524             : #if BPLUS_IMPL_STYLE==0 || BPLUS_IMPL_STYLE==1
     525             : 
     526             : /* Header *************************************************************/
     527             : 
     528             : #include "../log/fd_log.h"
     529             : 
     530             : struct BPLUS_(private);
     531             : typedef struct BPLUS_(private) BPLUS_(t);
     532             : 
     533             : struct BPLUS_(private_iter);
     534             : typedef struct BPLUS_(private_iter) BPLUS_(iter_t);
     535             : 
     536             : /* Internal use only */
     537             : 
     538             : /* A bplus_private_node_t is used for finding leaves that might contain
     539             :    an element fast. */
     540             : 
     541             : struct __attribute__((aligned(BPLUS_NODE_ALIGN))) BPLUS_(private_node) {
     542             : 
     543             :   /* This point is BPLUS_NODE_ALIGN aligned */
     544             : 
     545             :   ulong       tree_cnt;                     /* if acquired, in [0,BPLUS_TREE_MAX],  else ignored */
     546             :   ulong       tree_off[ BPLUS_TREE_MAX   ]; /* if acquired, indexed [0,tree_cnt),   else
     547             :                                                tree_off[0]==node pool next offset (0 if last node in pool) */
     548             :   BPLUS_KEY_T pivot   [ BPLUS_TREE_MAX-1 ]; /* if acquired, indexed [0,tree_cnt-1), else ignored */
     549             : 
     550             :   /* tree i handles keys in [ pivot[i-1], pivot[i] ), pivot[-1] /
     551             :      pivot[tree_cnt-1] are implied to be the previous / next pivot in an
     552             :      in-order traversal of the bplus tree node pivots (or -/+inf if
     553             :      leftmost/rightmost). */
     554             : };
     555             : 
     556             : typedef struct BPLUS_(private_node) BPLUS_(private_node_t);
     557             : 
     558             : /* A bplus_private_leaf_t holds up to pair_cnt elements of pairs in the
     559             :    tree in a sorted order. */
     560             : 
     561             : struct __attribute__((aligned(BPLUS_LEAF_ALIGN))) BPLUS_(private_leaf) {
     562             : 
     563             :   /* This point is BPLUS_LEAF_ALIGN aligned */
     564             : 
     565             :   ulong        pair_cnt;               /* if acquired, in [0,BPLUS_PAIR_MAX],                                    else ignored */
     566             :   ulong        prev_off;               /* if acquired, prev leaf offset (or 0 if first leaf),                    else ignored */
     567             :   ulong        next_off;               /* if acquired, next leaf offset (or 0 if last  leaf),
     568             :                                           else leaf pool next offset (0 if last node in pool) */
     569             :   BPLUS_PAIR_T pair[ BPLUS_PAIR_MAX ]; /* if acquired, indexed [0,pair_cnt), unique keys and in ascending order, else ignored */
     570             : };
     571             : 
     572             : typedef struct BPLUS_(private_leaf) BPLUS_(private_leaf_t);
     573             : 
     574             : /* A bplus_private_t is a continguous region of memory that holds a
     575             :    bplus tree.  Important invariants:
     576             : 
     577             :    - Empty trees have no root.
     578             :    - If root is a leaf, it has [1,pair_max] pairs.
     579             :    - If root is a node, it has [2,tree_max] trees.
     580             :    - Non-root nodes  have [tree_min,tree_max] trees.
     581             :    - Non-root leaves have [pair_min,pair_max] pairs.
     582             :    - Children of a node are not a mix of nodes and leaves. */
     583             : 
     584             : struct __attribute__((aligned(BPLUS_ALIGN))) BPLUS_(private) {
     585             : 
     586             :   /* This point is aligned BPLUS_ALIGN */
     587             : 
     588             :   ulong magic;                              /* ==BPLUS_MAGIC */
     589             :   ulong node_max;      ulong leaf_max;      /* maximum number of node/leaf in the store */
     590             :   ulong node_lo;       ulong leaf_lo;       /* offset from the first byte of bplus header to the node/leaf storage */
     591             :   ulong node_pool_off; ulong leaf_pool_off; /* first node/leaf in node/leaf pool, 0 if no node/leaf in pool */
     592             :   ulong root_off;                           /* offset of node/leaf to tree root (or 0 if empty) */
     593             :   ulong leaf_min_off;                       /* offset of leaf with minimum pair (or 0 if empty) */
     594             :   ulong leaf_max_off;                       /* offset of leaf with maximum pair (or 0 if empty) */
     595             : 
     596             :   /* padding to BPLUS_NODE_ALIGN here */
     597             :   /* node_lo points here, node_max elements, indexed [0,node_max) */
     598             :   /* padding to BPLUS_LEAF_ALIGN here */
     599             :   /* leaf_lo points here, leaf_max elements, indexed [0,leaf_max) */
     600             :   /* padding to BPLUS_ALIGN here */
     601             : 
     602             : };
     603             : 
     604             : typedef struct BPLUS_(private) BPLUS_(private_t);
     605             : 
     606             : struct BPLUS_(private_iter) {
     607             :   ulong leaf_off; /* offset to current leaf */
     608             :   ulong pair_idx; /* current pair in current leaf */
     609             : };
     610             : 
     611             : FD_PROTOTYPES_BEGIN
     612             : 
     613             : /* bplus_private_{pair,tree}_{min,max} return the corresponding
     614             :    configuration values for this bplus implementation. */
     615             : 
     616    13161015 : FD_FN_CONST static inline ulong BPLUS_(private_pair_min)( void ) { return (ulong)(BPLUS_PAIR_MAX/2); } /* exact */
     617    13161021 : FD_FN_CONST static inline ulong BPLUS_(private_pair_max)( void ) { return (ulong) BPLUS_PAIR_MAX;    }
     618             : 
     619    10361790 : FD_FN_CONST static inline ulong BPLUS_(private_tree_min)( void ) { return (ulong)(BPLUS_TREE_MAX/2); } /* exact */
     620    10361769 : FD_FN_CONST static inline ulong BPLUS_(private_tree_max)( void ) { return (ulong) BPLUS_TREE_MAX;    }
     621             : 
     622             : /* bplus_private_{node,leaf}_max_max return a value for {node,leaf}_max
     623             :    such that the {node,leaf} storage of the bplus tree will require at
     624             :    most 2^62 bytes. */
     625             : 
     626             : FD_FN_CONST static inline ulong
     627     1873272 : BPLUS_(private_node_max_max)( void ) {
     628     1873272 :   return ((1UL<<62)-BPLUS_NODE_ALIGN+1UL) / sizeof( BPLUS_(private_node_t));
     629     1873272 : }
     630             : 
     631             : FD_FN_CONST static inline ulong
     632     1873272 : BPLUS_(private_leaf_max_max)( void ) {
     633     1873272 :   return ((1UL<<62)-BPLUS_LEAF_ALIGN+1UL) / sizeof( BPLUS_(private_leaf_t));
     634     1873272 : }
     635             : 
     636             : /* bplus_private_key_cmp gives BPLUS_KEY_CMP the exact function
     637             :    signature used by the below implementations. */
     638             : 
     639             : FD_FN_PURE static inline int
     640             : BPLUS_(private_key_cmp)( BPLUS_KEY_T const * a,
     641  9903640383 :                          BPLUS_KEY_T const * b ) {
     642  9903640383 :   return BPLUS_KEY_CMP(a,b);
     643  9903640383 : }
     644             : 
     645             : /* bplus_private_is_leaf returns 1 if the root of the tree at bplus
     646             :    global offset is a leaf or 0 if it is a node.  leaf_lo is the bplus
     647             :    global offset of the leaf preallocated storage.  Assumes tree_off and
     648             :    leaf_lo are valid. */
     649             : 
     650  4652879142 : FD_FN_CONST static inline int BPLUS_(private_is_leaf)( ulong tree_off, ulong leaf_lo ) { return tree_off>=leaf_lo; }
     651             : 
     652             : /* bplus_private returns location of the bplus private metadata in the
     653             :    caller's address space given a valid local join.  Lifetime of the
     654             :    returned pointer is the lifetime of the join.  bplus_private_const is
     655             :    a const correct version. */
     656             : 
     657             : FD_FN_CONST static inline BPLUS_(private_t) *
     658    16917414 : BPLUS_(private)( BPLUS_(t) * join ) {
     659    16917414 :   return (BPLUS_(private_t) *)join;
     660    16917414 : }
     661             : 
     662             : FD_FN_CONST static inline BPLUS_(private_t) const *
     663    89992482 : BPLUS_(private_const)( BPLUS_(t) const * join ) {
     664    89992482 :   return (BPLUS_(private_t) const *)join;
     665    89992482 : }
     666             : 
     667             : /* bplus_private_{node,leaf} return the pointer in the caller's local
     668             :    address space of the {node,leaf} located at bplus global
     669             :    {node,leaf}_off.  The lifetime of the returned pointer is the
     670             :    lifetime of the local join.  Assumes bplus and node_off are valid. */
     671             : 
     672             : FD_FN_CONST static inline BPLUS_(private_node_t) *
     673             : BPLUS_(private_node)( BPLUS_(private_t) * bplus,
     674    71810601 :                       ulong               node_off ) {
     675    71810601 :   return (BPLUS_(private_node_t) *)((ulong)bplus + node_off);
     676    71810601 : }
     677             : 
     678             : FD_FN_CONST static inline BPLUS_(private_leaf_t) *
     679             : BPLUS_(private_leaf)( BPLUS_(private_t) * bplus,
     680    23130888 :                       ulong               leaf_off ) {
     681    23130888 :   return (BPLUS_(private_leaf_t) *)((ulong)bplus + leaf_off);
     682    23130888 : }
     683             : 
     684             : FD_FN_CONST static inline BPLUS_(private_node_t) const *
     685             : BPLUS_(private_node_const)( BPLUS_(private_t) const * bplus,
     686  4274319606 :                             ulong                     node_off ) {
     687  4274319606 :   return (BPLUS_(private_node_t) const *)((ulong)bplus + node_off);
     688  4274319606 : }
     689             : 
     690             : FD_FN_CONST static inline BPLUS_(private_leaf_t) const *
     691             : BPLUS_(private_leaf_const)( BPLUS_(private_t) const * bplus,
     692  7092034104 :                             ulong                     leaf_off ) {
     693  7092034104 :   return (BPLUS_(private_leaf_t) const *)((ulong)bplus + leaf_off);
     694  7092034104 : }
     695             : 
     696             : /* bplus_private_off returns the bplus global offset for the given
     697             :    address in the caller's address space.  Assumes bplus is valid and
     698             :    addr is non-NULL and into the bplus memory region. */
     699             : 
     700             : FD_FN_CONST static inline ulong
     701             : BPLUS_(private_off)( BPLUS_(private_t) const * bplus,
     702     4347300 :                      void const *              addr ) {
     703     4347300 :   return (ulong)addr - (ulong)bplus;
     704     4347300 : }
     705             : 
     706             : /* bplus_private_node_acquire acquires a node from the bplus's node pool
     707             :    and returns a pointer to it in the caller's address space.  Assumes
     708             :    bplus is valid.  Returns NULL if bplus node pool is empty.
     709             : 
     710             :    bplus_private_node_release releases a node to the bplus's node pool.
     711             :    Assumes bplus is valid, node is valid and node is not currently in
     712             :    the pool.
     713             : 
     714             :    Similarly for bplus_private_leaf_{acquire,release}. */
     715             : 
     716             : static inline BPLUS_(private_node_t) *
     717      232035 : BPLUS_(private_node_acquire)( BPLUS_(private_t) * bplus ) {
     718      232035 :   ulong node_off = bplus->node_pool_off;
     719      232035 :   if( FD_UNLIKELY( !node_off ) ) return NULL;
     720      232035 :   BPLUS_(private_node_t *) node = BPLUS_(private_node)( bplus, node_off );
     721      232035 :   bplus->node_pool_off = node->tree_off[0];
     722      232035 :   return node;
     723      232035 : }
     724             : 
     725             : static inline void
     726             : BPLUS_(private_node_release)( BPLUS_(private_t)      * bplus,
     727      236961 :                               BPLUS_(private_node_t) * node ) {
     728      236961 :   node->tree_off[0]    = bplus->node_pool_off;
     729      236961 :   bplus->node_pool_off = BPLUS_(private_off)( bplus, node );
     730      236961 : }
     731             : 
     732             : static inline BPLUS_(private_leaf_t) *
     733      967224 : BPLUS_(private_leaf_acquire)( BPLUS_(private_t) * bplus ) {
     734      967224 :   ulong leaf_off = bplus->leaf_pool_off;
     735      967224 :   if( FD_UNLIKELY( !leaf_off ) ) return NULL;
     736      967221 :   BPLUS_(private_leaf_t *) leaf = BPLUS_(private_leaf)( bplus, leaf_off );
     737      967221 :   bplus->leaf_pool_off = leaf->next_off;
     738      967221 :   return leaf;
     739      967224 : }
     740             : 
     741             : static inline void
     742             : BPLUS_(private_leaf_release)( BPLUS_(private_t)      * bplus,
     743      976146 :                               BPLUS_(private_leaf_t) * leaf ) {
     744      976146 :   leaf->next_off       = bplus->leaf_pool_off;
     745      976146 :   bplus->leaf_pool_off = BPLUS_(private_off)( bplus, leaf );
     746      976146 : }
     747             : 
     748             : /* bplus_private_insert inserts or upserts a key into a bplus tree.
     749             :    Assumes join is a current local join and key points to a valid key in
     750             :    the caller's address space and upsert is in [0,1].
     751             : 
     752             :    upsert 0: key will inserted into bplus.  On success, returns the pair
     753             :    where key was inserted and, on return, *_insert will be 1.  Caller
     754             :    can update all fields in the pair except the key.  Lifetime of the
     755             :    returned pointer is until the next insert / upsert / remove.  Returns
     756             :    NULL if there was no room in the bplus tree or if key was already in
     757             :    the bplus tree (might have moved pairs around in bplus tree on
     758             :    failure) and _insert will be untouched.
     759             : 
     760             :    upsert 1: key will inserted or updated into bplus.  If key is already
     761             :    present in the bplus tree, returns the location in the caller's
     762             :    address space of the pair with the matching key and, on return,
     763             :    *_insert will be 0.  If not, inserts the key and requires the
     764             :    location in the caller's address space where pair was inserted and,
     765             :    on return, *_insert will be 1.  In both cases, the lifetime of the
     766             :    returned pointer is until the next insert / upsert / remove.  Returns
     767             :    NULL if there was no room in the bplus tree to insert (might have
     768             :    moved pairs around in bplus tree on failure) and _insert will be
     769             :    untouched.
     770             : 
     771             :    The bplus retains no interest in query on return. */
     772             : 
     773             : BPLUS_STATIC BPLUS_PAIR_T *
     774             : BPLUS_(private_insert)( BPLUS_(t)         * join,
     775             :                         BPLUS_KEY_T const * key,
     776             :                         int                 upsert,
     777             :                         int *               _insert );
     778             : 
     779             : /* bplus_private_iter returns the iterator corresponding to query and
     780             :    op.  Assumes join is a current local join, query points to a valid
     781             :    key in the caller's address space or is NULL and op is in [0,3].
     782             :    Returns an iter positioned at:
     783             : 
     784             :      op     | position
     785             :      -------+-------------------------------------------------------------------------------------------------------------------
     786             :      0 (GE) | the first pair with a key greater than or equal to query (or nul if all have keys less than query)
     787             :      1 (GT) | the first pair with a key greater than             query (or nul if all have keys less than or equal to query)
     788             :      2 (LE) | the last  pair with a key less    than or equal to query (or nul if all have keys greater than query)
     789             :      3 (LT) | the last  pair with a key less    than             query (or nul if all have keys greater than or equal to query)
     790             : 
     791             :    If query is NULL, iteration will be positioned as though:
     792             : 
     793             :      op     | query
     794             :      -------+-------
     795             :      0 (GE) | +inf
     796             :      1 (GT) | +inf
     797             :      2 (LE) | -inf
     798             :      3 (LT) | -inf
     799             : 
     800             :    The bplus retains no interest in query on return. */
     801             : 
     802             : FD_FN_PURE BPLUS_STATIC BPLUS_(iter_t)
     803             : BPLUS_(private_iter)( BPLUS_(t)   const * join,
     804             :                       BPLUS_KEY_T const * query,
     805             :                       int                 op );
     806             : 
     807             : FD_PROTOTYPES_END
     808             : 
     809             : /* End internal use only */
     810             : 
     811             : FD_PROTOTYPES_BEGIN
     812             : 
     813             : /* Constructors */
     814             : 
     815             : FD_FN_CONST BPLUS_STATIC ulong BPLUS_(leaf_max_est)( ulong ele_max_est );
     816             : FD_FN_CONST BPLUS_STATIC ulong BPLUS_(node_max_est)( ulong ele_max_est );
     817             : 
     818             : FD_FN_CONST BPLUS_STATIC ulong BPLUS_(align)    ( void );
     819             : FD_FN_CONST BPLUS_STATIC ulong BPLUS_(footprint)( ulong node_max, ulong leaf_max );
     820             : 
     821             : BPLUS_STATIC void      * BPLUS_(new)   ( void *      shmem, ulong node_max, ulong leaf_max );
     822             : BPLUS_STATIC BPLUS_(t) * BPLUS_(join)  ( void *      shbplus );
     823             : BPLUS_STATIC void      * BPLUS_(leave) ( BPLUS_(t) * join );
     824             : BPLUS_STATIC void      * BPLUS_(delete)( void *      shbplus );
     825             : 
     826             : /* Accessors */
     827             : 
     828     1873254 : FD_FN_PURE static inline ulong BPLUS_(node_max)( BPLUS_(t) const * join ) { return BPLUS_(private_const)( join )->node_max; }
     829     1873254 : FD_FN_PURE static inline ulong BPLUS_(leaf_max)( BPLUS_(t) const * join ) { return BPLUS_(private_const)( join )->leaf_max; }
     830             : 
     831    16869642 : FD_FN_PURE static inline int BPLUS_(is_empty)( BPLUS_(t) const * join ) { return !BPLUS_(private_const)( join )->root_off; }
     832             : 
     833             : FD_FN_PURE static inline BPLUS_PAIR_T const *
     834     7497702 : BPLUS_(min_const)( BPLUS_(t) const * join ) {
     835     7497702 :   BPLUS_(t) const * bplus = BPLUS_(private_const)( join );
     836     7497702 :   BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, bplus->leaf_min_off );
     837     7497702 :   return &leaf->pair[0];
     838     7497702 : }
     839             : 
     840             : FD_FN_PURE static inline BPLUS_PAIR_T const *
     841     7497702 : BPLUS_(max_const)( BPLUS_(t) const * join ) {
     842     7497702 :   BPLUS_(t) const * bplus = BPLUS_(private_const)( join );
     843     7497702 :   BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, bplus->leaf_max_off );
     844     7497702 :   return &leaf->pair[ leaf->pair_cnt-1UL ];
     845     7497702 : }
     846             : 
     847     3748851 : FD_FN_PURE static inline BPLUS_PAIR_T * BPLUS_(min)( BPLUS_(t) * join ) { return (BPLUS_PAIR_T *)BPLUS_(min_const)( join ); }
     848     3748851 : FD_FN_PURE static inline BPLUS_PAIR_T * BPLUS_(max)( BPLUS_(t) * join ) { return (BPLUS_PAIR_T *)BPLUS_(max_const)( join ); }
     849             : 
     850             : FD_FN_PURE BPLUS_STATIC BPLUS_PAIR_T const * BPLUS_(query_const)( BPLUS_(t) const * join, BPLUS_KEY_T const * query );
     851             : 
     852             : FD_FN_PURE static inline BPLUS_PAIR_T *
     853             : BPLUS_(query)( BPLUS_(t)         * join,
     854    13124622 :                BPLUS_KEY_T const * query ) {
     855    13124622 :   return (BPLUS_PAIR_T *)BPLUS_(query_const)( join, query );
     856    13124622 : }
     857             : 
     858             : /* Operations */
     859             : 
     860             : static inline BPLUS_PAIR_T *
     861             : BPLUS_(insert)( BPLUS_(t) *         join,
     862     3773271 :                 BPLUS_KEY_T const * key ) {
     863     3773271 :   int dummy;
     864     3773271 :   return BPLUS_(private_insert)( join, key, 0, &dummy );
     865     3773271 : }
     866             : 
     867             : static inline BPLUS_PAIR_T *
     868             : BPLUS_(upsert)( BPLUS_(t) *         join,
     869             :                 BPLUS_KEY_T const * key,
     870     3751845 :                 int *               _opt_insert ) {
     871     3751845 :   int dummy;
     872     3751845 :   if( !_opt_insert ) _opt_insert = &dummy; /* compile time */
     873     3751845 :   return BPLUS_(private_insert)( join, key, 1, _opt_insert );
     874     3751845 : }
     875             : 
     876             : BPLUS_STATIC int BPLUS_(remove_key)( BPLUS_(t) * join, BPLUS_KEY_T const * key );
     877             : 
     878     1873158 : static inline void BPLUS_(remove)( BPLUS_(t) * join, BPLUS_PAIR_T * pair ) { BPLUS_(remove_key)( join, &pair->BPLUS_PAIR_KEY ); }
     879             : 
     880             : BPLUS_STATIC void BPLUS_(flush)( BPLUS_(t) * join );
     881             : 
     882             : BPLUS_STATIC int BPLUS_(verify)( BPLUS_(t) const * join );
     883             : 
     884             : /* Iteration */
     885             : /* FIXME: FD_FN_CONST for nul/eq/eq_nul/pair/pair_const?  FD_FN_PURE for
     886             :    min/max/prev/next? */
     887             : 
     888             : static inline BPLUS_(iter_t)
     889     1875756 : BPLUS_(iter_nul)( BPLUS_(t) const * join ) {
     890     1875756 :   (void)join;
     891     1875756 :   BPLUS_(iter_t) iter;
     892     1875756 :   iter.leaf_off = 0UL;
     893     1875756 :   iter.pair_idx = 0UL;
     894     1875756 :   return iter;
     895     1875756 : }
     896             : 
     897             : static inline BPLUS_(iter_t)
     898     1875756 : BPLUS_(iter_min)( BPLUS_(t) const * join ) {
     899     1875756 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
     900     1875756 :   ulong leaf_off = bplus->leaf_min_off;
     901     1875756 :   BPLUS_(iter_t) iter;
     902     1875756 :   iter.leaf_off = leaf_off;
     903     1875756 :   iter.pair_idx = 0UL;
     904     1875756 :   return iter;
     905     1875756 : }
     906             : 
     907             : static inline BPLUS_(iter_t)
     908     1875756 : BPLUS_(iter_max)( BPLUS_(t) const * join ) {
     909     1875756 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
     910     1875756 :   ulong leaf_off = bplus->leaf_max_off;
     911     1875756 :   BPLUS_(iter_t) iter;
     912     1875756 :   iter.leaf_off = leaf_off;
     913     1875756 :   iter.pair_idx = (FD_UNLIKELY( !leaf_off ) ? 1UL : BPLUS_(private_leaf_const)( bplus, leaf_off )->pair_cnt) - 1UL;
     914     1875756 :   return iter;
     915     1875756 : }
     916             : 
     917             : FD_FN_PURE static inline BPLUS_(iter_t)
     918             : BPLUS_(iter_ge)( BPLUS_(t)   const * join,
     919     3749115 :                  BPLUS_KEY_T const * query ) {
     920     3749115 :   return BPLUS_(private_iter)( join, query, 0 );
     921     3749115 : }
     922             : 
     923             : FD_FN_PURE static inline BPLUS_(iter_t)
     924             : BPLUS_(iter_gt)( BPLUS_(t)   const * join,
     925     3749115 :                  BPLUS_KEY_T const * query ) {
     926     3749115 :   return BPLUS_(private_iter)( join, query, 1 );
     927     3749115 : }
     928             : 
     929             : FD_FN_PURE static inline BPLUS_(iter_t)
     930             : BPLUS_(iter_le)( BPLUS_(t)   const * join,
     931     3749115 :                  BPLUS_KEY_T const * query ) {
     932     3749115 :   return BPLUS_(private_iter)( join, query, 2 );
     933     3749115 : }
     934             : 
     935             : FD_FN_PURE static inline BPLUS_(iter_t)
     936             : BPLUS_(iter_lt)( BPLUS_(t)   const * join,
     937     3749115 :                  BPLUS_KEY_T const * query ) {
     938     3749115 :   return BPLUS_(private_iter)( join, query, 3 );
     939     3749115 : }
     940             : 
     941             : static inline int
     942             : BPLUS_(iter_eq)( BPLUS_(t) const * join,
     943             :                  BPLUS_(iter_t)    iter,
     944    11251554 :                  BPLUS_(iter_t)    fini ) {
     945    11251554 :   (void)join;
     946    11251554 :   return (iter.leaf_off==fini.leaf_off) & (iter.pair_idx==fini.pair_idx);
     947    11251554 : }
     948             : 
     949             : static inline int
     950             : BPLUS_(iter_eq_nul)( BPLUS_(t) const * join,
     951    14998770 :                      BPLUS_(iter_t)    iter ) {
     952    14998770 :   (void)join;
     953    14998770 :   return !iter.leaf_off;
     954    14998770 : }
     955             : 
     956             : __attribute__((warn_unused_result))
     957             : static inline BPLUS_(iter_t)
     958             : BPLUS_(iter_next)( BPLUS_(t) const * join,
     959     3751176 :                    BPLUS_(iter_t)    iter ) {
     960     3751176 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
     961             : 
     962     3751176 :   ulong leaf_off = iter.leaf_off;
     963     3751176 :   ulong pair_idx = iter.pair_idx;
     964             : 
     965     3751176 :   BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, leaf_off );
     966             : 
     967     3751176 :   pair_idx++;
     968     3751176 :   if( FD_UNLIKELY( pair_idx>=leaf->pair_cnt ) ) { /* optimize for high radix */
     969     3751176 :     leaf_off = leaf->next_off;
     970     3751176 :     pair_idx = 0UL;
     971     3751176 :   }
     972             : 
     973     3751176 :   iter.leaf_off = leaf_off;
     974     3751176 :   iter.pair_idx = pair_idx;
     975     3751176 :   return iter;
     976     3751176 : }
     977             : 
     978             : __attribute__((warn_unused_result))
     979             : static inline BPLUS_(iter_t)
     980             : BPLUS_(iter_prev)( BPLUS_(t) const * join,
     981     1875681 :                    BPLUS_(iter_t)    iter ) {
     982     1875681 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
     983             : 
     984     1875681 :   ulong leaf_off = iter.leaf_off;
     985     1875681 :   ulong pair_idx = iter.pair_idx;
     986             : 
     987     1875681 :   BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, leaf_off );
     988             : 
     989     1875681 :   if( FD_UNLIKELY( !pair_idx ) ) { /* optimize for high radix */
     990     1875681 :     leaf_off = leaf->prev_off;
     991     1875681 :     pair_idx = FD_UNLIKELY( !leaf_off ) ? 1UL : BPLUS_(private_leaf_const)( bplus, leaf_off )->pair_cnt;
     992     1875681 :   }
     993     1875681 :   pair_idx--;
     994             : 
     995     1875681 :   iter.leaf_off = leaf_off;
     996     1875681 :   iter.pair_idx = pair_idx;
     997     1875681 :   return iter;
     998     1875681 : }
     999             : 
    1000             : static inline BPLUS_PAIR_T const *
    1001             : BPLUS_(iter_pair_const)( BPLUS_(t) const * join,
    1002    11243061 :                          BPLUS_(iter_t)    iter ) {
    1003    11243061 :   return BPLUS_(private_leaf_const)( BPLUS_(private_const)( join ), iter.leaf_off )->pair + iter.pair_idx;
    1004    11243061 : }
    1005             : 
    1006             : static inline BPLUS_PAIR_T *
    1007             : BPLUS_(iter_pair)( BPLUS_(t) *    join,
    1008     3751362 :                    BPLUS_(iter_t) iter ) {
    1009     3751362 :   return BPLUS_(private_leaf)( BPLUS_(private)( join ), iter.leaf_off )->pair + iter.pair_idx;
    1010     3751362 : }
    1011             : 
    1012             : FD_PROTOTYPES_END
    1013             : 
    1014             : #endif
    1015             : 
    1016             : #if BPLUS_IMPL_STYLE==0 || BPLUS_IMPL_STYLE==2
    1017             : 
    1018             : /* Implementation *****************************************************/
    1019             : 
    1020             : /* bplus_private_node_query returns the index of a node's child tree,
    1021             :    in [0,tree_cnt), that might contain query.
    1022             : 
    1023             :      tree 0          covers keys [ -inf,              pivot[0] )
    1024             :           i          covers keys [ pivot[i-1],        pivot[i] )
    1025             :           tree_cnt-1 covers keys [ pivot[tree_cnt-2], +inf     )
    1026             : 
    1027             :    Assumes pivot contains unique keys in ascending order, tree_cnt is in
    1028             :    [2,tree_max], tree_max <<< ULONG_MAX and query is valid. */
    1029             : 
    1030             : FD_FN_PURE static ulong
    1031             : BPLUS_(private_node_query)( BPLUS_KEY_T const * FD_RESTRICT pivot,
    1032             :                             ulong                           tree_cnt,
    1033   242154210 :                             BPLUS_KEY_T const * FD_RESTRICT query ) {
    1034   242154210 :   ulong i0 = 0UL;
    1035   242154210 :   ulong i1 = tree_cnt;
    1036             : 
    1037   452318847 :   do {
    1038             : 
    1039             :     /* At this point, query might be found in trees in [i0,i1) and this
    1040             :        range contains at least two trees.  Test the middle tree.  If it
    1041             :        matches exactly, we are done.  Otherwise, recurse on the
    1042             :        appropriate half of the range. */
    1043             : 
    1044   452318847 :     ulong im = (i0+i1) >> 1; /* No overflow, at least 1 */
    1045             : 
    1046   452318847 :     int cmp = BPLUS_(private_key_cmp)( query, &pivot[im-1UL] );
    1047   452318847 :     if( FD_UNLIKELY( !cmp ) ) return im; /* (optional) early abort, optimize for big trees */
    1048   447066750 :     i0 = fd_ulong_if( cmp<0, i0, im );
    1049   447066750 :     i1 = fd_ulong_if( cmp<0, im, i1 );
    1050             : 
    1051   447066750 :   } while( FD_LIKELY( (i1-i0)>1UL) ); /* optimize for big trees */
    1052             : 
    1053   236902113 :   return i0;
    1054   242154210 : }
    1055             : 
    1056             : /* bplus_private_pair_query returns the index of a leaf's pair, in
    1057             :    [0,pair_cnt), that exactly matches query or pair if there is no
    1058             :    matching pair.  Assumes pair keys are unique and ascending sorted,
    1059             :    pair_cnt is in [1,pair_max], pair_max <<< ULONG_MAX and query is
    1060             :    valid. */
    1061             : 
    1062             : FD_FN_PURE static ulong
    1063             : BPLUS_(private_pair_query)( BPLUS_PAIR_T const * FD_RESTRICT pair,
    1064             :                             ulong                            pair_cnt,
    1065    22530249 :                             BPLUS_KEY_T  const * FD_RESTRICT query ) {
    1066    22530249 :   ulong i0 = 0UL;
    1067    22530249 :   ulong i1 = pair_cnt;
    1068             : 
    1069    36757224 :   do {
    1070             : 
    1071             :     /* At this point, query might match one of the pairs in [i0,i1) and
    1072             :        this range is not empty.  Test the pair in the middle.  If it
    1073             :        matches, we found the pair.  Otherwise, recurse appropriate half
    1074             :        of the range (exclusive of our query). */
    1075             : 
    1076    36757224 :     ulong im = (i0+i1) >> 1; /* No overflow */
    1077             : 
    1078    36757224 :     int cmp = BPLUS_(private_key_cmp)( query, &pair[im].BPLUS_PAIR_KEY );
    1079    36757224 :     if( FD_UNLIKELY( !cmp ) ) return im; /* Found, optimize for big trees */
    1080    23600799 :     i0 = fd_ulong_if( cmp<0, i0, im+1UL );
    1081    23600799 :     i1 = fd_ulong_if( cmp<0, im, i1     );
    1082             : 
    1083    23600799 :   } while( FD_LIKELY( i1-i0 ) ); /* optimize for big trees */
    1084             : 
    1085     9373824 :   return pair_cnt; /* not found */
    1086    22530249 : }
    1087             : 
    1088             : /* bplus_private_child_insert inserts a child at position child_idx into
    1089             :    parent.  Parent should have a tree_cnt in [1,tree_max-1] and
    1090             :    child_idx should be in [1,tree_cnt] (such that the child is never
    1091             :    inserted into a parent with no children or a parent with the maximum
    1092             :    number of children and is never inserted as the first born child).
    1093             :    child_off is the bplus global offset of the child.  This can be a
    1094             :    node or leaf but it should match parent's current children.
    1095             :    child_pivot is the pivot value associated with the child and the
    1096             :    child_idx should preserve the parent's pivot sorting.  Further, child
    1097             :    should not contain any keys that outside the parent's pivot range
    1098             :    after the insert. */
    1099             : 
    1100             : static void
    1101             : BPLUS_(private_child_insert)( BPLUS_(private_node_t) * FD_RESTRICT parent,
    1102             :                               ulong                                child_idx,
    1103             :                               ulong                                child_off,
    1104     1198194 :                               BPLUS_KEY_T const      * FD_RESTRICT child_pivot ) {
    1105     1198194 :   ulong                     tree_cnt = parent->tree_cnt;
    1106     1198194 :   ulong       * FD_RESTRICT tree_off = parent->tree_off;
    1107     1198194 :   BPLUS_KEY_T * FD_RESTRICT pivot    = parent->pivot;
    1108             : 
    1109             :   /* Make room for child at child_idx by shifting childen currently at
    1110             :      or after child_idx up one. */
    1111             : 
    1112     2885352 :   for( ulong sibling_idx=tree_cnt; sibling_idx>child_idx; sibling_idx-- ) {
    1113     1687158 :     tree_off[sibling_idx    ] = tree_off[sibling_idx-1UL];
    1114     1687158 :     pivot   [sibling_idx-1UL] = pivot   [sibling_idx-2UL];
    1115     1687158 :   }
    1116             : 
    1117             :   /* Insert the child at child_idx */
    1118             : 
    1119     1198194 :   tree_off[child_idx    ] = child_off;
    1120     1198194 :   pivot   [child_idx-1UL] = child_pivot[0];
    1121             : 
    1122     1198194 :   parent->tree_cnt = tree_cnt + 1UL; /* In [2,tree_max] */
    1123     1198194 : }
    1124             : 
    1125             : /* bplus_private_child_remove removes the child child_idx from the bplus
    1126             :    node parent.  Assumes parent is valid with a tree cnt in [2,tree_max]
    1127             :    and that child is in [1,tree_cnt) (as such, this will never remove
    1128             :    the first born child). */
    1129             : 
    1130             : static void
    1131             : BPLUS_(private_child_remove)( BPLUS_(private_node_t) * FD_RESTRICT parent,
    1132     1193622 :                               ulong                                child_idx ) {
    1133     1193622 :   ulong                     tree_cnt = parent->tree_cnt;
    1134     1193622 :   ulong       * FD_RESTRICT tree_off = parent->tree_off;
    1135     1193622 :   BPLUS_KEY_T * FD_RESTRICT pivot    = parent->pivot;
    1136             : 
    1137             :   /* Fill the hole at child_idx by shifting childen currently at or
    1138             :      after child_idx down one. */
    1139             : 
    1140     1193622 :   tree_cnt--;
    1141     2689209 :   for( ulong sibling_idx=child_idx; sibling_idx<tree_cnt; sibling_idx++ ) {
    1142     1495587 :     tree_off[sibling_idx    ] = tree_off[sibling_idx+1UL];
    1143     1495587 :     pivot   [sibling_idx-1UL] = pivot   [sibling_idx    ];
    1144     1495587 :   }
    1145             : 
    1146     1193622 :   parent->tree_cnt = tree_cnt; /* In [1,tree_max-1] */
    1147     1193622 : }
    1148             : 
    1149             : ulong
    1150          18 : BPLUS_(leaf_max_est)( ulong ele_max_est ) {
    1151             : 
    1152             :   /* No leaves needed for always empty trees */
    1153             : 
    1154          18 :   if( FD_UNLIKELY( !ele_max_est ) ) return 0UL;
    1155             : 
    1156             :   /* Trivial bplus trees have just a root leaf */
    1157             : 
    1158          12 :   if( FD_UNLIKELY( ele_max_est<=BPLUS_(private_pair_max)() ) ) return 1UL;
    1159             : 
    1160             :   /* In a non-trivial bplus tree, each leaf has at least
    1161             :      pair_min==pair_max/2 elements.  So, we require:
    1162             : 
    1163             :           leaf_max*pair_min >= ele_max_est
    1164             :        -> leaf_max >= ele_max_est / pair_min
    1165             : 
    1166             :      The smallest leaf_max that satisfies this is:
    1167             : 
    1168             :           ceil( ele_max_est / pair_min )
    1169             :        -> floor( (ele_max_est + pair_min - 1) / pair_min )
    1170             :        -> 1 + floor( (ele_max_est - 1) / pair_min */
    1171             : 
    1172           6 :   return 1UL + ((ele_max_est-1UL) / BPLUS_(private_pair_min)()); /* No overflow */
    1173          12 : }
    1174             : 
    1175             : ulong
    1176           9 : BPLUS_(node_max_est)( ulong ele_max_est ) {
    1177             : 
    1178             :   /* Start at the leaf layer with leaf_max trees */
    1179             : 
    1180           9 :   ulong node_max = 0UL;
    1181           9 :   ulong tree_cnt = BPLUS_(leaf_max_est)( ele_max_est );
    1182             : 
    1183          30 :   while( tree_cnt>1UL ) {
    1184             : 
    1185             :     /* At this point, we have more than one tree in the current layer.
    1186             :        To reduce the number of trees, we create a new layer of nodes
    1187             :        above it and make each new node responsible for up to
    1188             :        tree_min==tree_max/2 of the trees in the current layer to give a
    1189             :        reasonably tight bound to the worst case.  That implies this new
    1190             :        layer will need at most:
    1191             : 
    1192             :             ceil( tree_cnt / tree_min )
    1193             :          -> floor( (tree_cnt + tree_min - 1) / tree_min )
    1194             :          -> 1 + floor( (tree_cnt - 1) / tree_min )
    1195             : 
    1196             :        nodes and this layer will reduce to the number of trees to the
    1197             :        same number. */
    1198             : 
    1199          21 :     tree_cnt = 1UL + ((tree_cnt-1UL) / BPLUS_(private_tree_min)()); /* No overflow */
    1200          21 :     node_max += tree_cnt;
    1201             : 
    1202          21 :   }
    1203             : 
    1204           9 :   return node_max;
    1205           9 : }
    1206             : 
    1207             : ulong
    1208           3 : BPLUS_(align)( void ) {
    1209           3 :   return BPLUS_ALIGN;
    1210           3 : }
    1211             : 
    1212             : ulong
    1213             : BPLUS_(footprint)( ulong node_max,
    1214          18 :                    ulong leaf_max ) {
    1215             : 
    1216          18 :   if( FD_UNLIKELY( (node_max > BPLUS_(private_node_max_max)()) | (leaf_max > BPLUS_(private_leaf_max_max)()) ) ) return 0UL;
    1217             : 
    1218             :   /* At this point, the needed node and leaf storage is at most 2^63,
    1219             :      which is impractically large but also with plenty of room left over
    1220             :      for the metadata and remaining alignment padding. */
    1221             : 
    1222           6 :   ulong off = 0UL;                                  /**/                     off +=          sizeof( BPLUS_(private_t)      );
    1223           6 :   off = fd_ulong_align_up( off, BPLUS_NODE_ALIGN ); /*ulong node_lo = off;*/ off += node_max*sizeof( BPLUS_(private_node_t) );
    1224           6 :   off = fd_ulong_align_up( off, BPLUS_LEAF_ALIGN ); /*ulong leaf_lo = off;*/ off += leaf_max*sizeof( BPLUS_(private_leaf_t) );
    1225           6 :   off = fd_ulong_align_up( off, BPLUS_ALIGN );
    1226             : 
    1227           6 :   return off;
    1228          18 : }
    1229             : 
    1230             : void
    1231           6 : BPLUS_(flush)( BPLUS_(t) * bplus ) {
    1232           6 :   bplus->node_pool_off = 0UL;
    1233           6 :   bplus->leaf_pool_off = 0UL;
    1234           6 :   bplus->root_off      = 0UL;
    1235           6 :   bplus->leaf_min_off  = 0UL;
    1236           6 :   bplus->leaf_max_off  = 0UL;
    1237             : 
    1238           6 :   BPLUS_(private_node_t) * node = BPLUS_(private_node)( bplus, bplus->node_lo );
    1239        6162 :   for( ulong node_rem=bplus->node_max; node_rem; node_rem-- ) BPLUS_(private_node_release)( bplus, &node[ node_rem-1UL ] );
    1240             : 
    1241           6 :   BPLUS_(private_leaf_t) * leaf = BPLUS_(private_leaf)( bplus, bplus->leaf_lo );
    1242       12294 :   for( ulong leaf_rem=bplus->leaf_max; leaf_rem; leaf_rem-- ) BPLUS_(private_leaf_release)( bplus, &leaf[ leaf_rem-1UL ] );
    1243           6 : }
    1244             : 
    1245             : void *
    1246             : BPLUS_(new)( void * shmem,
    1247             :              ulong  node_max,
    1248          15 :              ulong  leaf_max ) {
    1249          15 :   BPLUS_(private_t) * bplus = (BPLUS_(private_t) *)shmem;
    1250             : 
    1251          15 :   if( FD_UNLIKELY( !bplus ) ) {
    1252           3 :     FD_LOG_WARNING(( "NULL shmem" ));
    1253           3 :     return NULL;
    1254           3 :   }
    1255             : 
    1256          12 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)bplus, BPLUS_ALIGN ) ) ) {
    1257           3 :     FD_LOG_WARNING(( "misaligned shmem" ));
    1258           3 :     return NULL;
    1259           3 :   }
    1260             : 
    1261           9 :   ulong footprint = BPLUS_(footprint)( node_max, leaf_max );
    1262           9 :   if( FD_UNLIKELY( !footprint ) ) {
    1263           6 :     FD_LOG_WARNING(( "bad node_max and/or leaf_max" ));
    1264           6 :     return NULL;
    1265           6 :   }
    1266             : 
    1267             :   /* Note: it is the caller's responsibility to clear the memory because
    1268             :      it is potentially very big and very time consuming to do so and may
    1269             :      already have been cleared (e.g. mmap from the OS) */
    1270             : 
    1271           3 :   ulong off;
    1272           3 :   off = 0UL;                                        /**/                 off +=          sizeof( BPLUS_(private_t)      );
    1273           3 :   off = fd_ulong_align_up( off, BPLUS_NODE_ALIGN ); ulong node_lo = off; off += node_max*sizeof( BPLUS_(private_node_t) );
    1274           3 :   off = fd_ulong_align_up( off, BPLUS_LEAF_ALIGN ); ulong leaf_lo = off; off += leaf_max*sizeof( BPLUS_(private_leaf_t) );
    1275           3 :   off = fd_ulong_align_up( off, BPLUS_ALIGN );
    1276             : 
    1277           3 :   bplus->node_max      = node_max; bplus->leaf_max      = leaf_max;
    1278           3 :   bplus->node_lo       = node_lo;  bplus->leaf_lo       = leaf_lo;
    1279             : 
    1280           3 :   BPLUS_(flush)( bplus );
    1281             : 
    1282           3 :   FD_COMPILER_MFENCE();
    1283           3 :   bplus->magic = BPLUS_MAGIC;
    1284           3 :   FD_COMPILER_MFENCE();
    1285             : 
    1286           3 :   return shmem;
    1287           9 : }
    1288             : 
    1289             : BPLUS_(t) *
    1290          12 : BPLUS_(join)( void * shbplus ) {
    1291          12 :   BPLUS_(private_t) * bplus = (BPLUS_(private_t) *)shbplus;
    1292             : 
    1293          12 :   if( FD_UNLIKELY( !bplus ) ) {
    1294           3 :     FD_LOG_WARNING(( "NULL shbplus" ));
    1295           3 :     return NULL;
    1296           3 :   }
    1297             : 
    1298           9 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)bplus, BPLUS_ALIGN ) ) ) {
    1299           3 :     FD_LOG_WARNING(( "misaligned shbplus" ));
    1300           3 :     return NULL;
    1301           3 :   }
    1302             : 
    1303           6 :   if( FD_UNLIKELY( bplus->magic!=BPLUS_MAGIC ) ) {
    1304           3 :     FD_LOG_WARNING(( "bad magic" ));
    1305           3 :     return NULL;
    1306           3 :   }
    1307             : 
    1308           3 :   return (BPLUS_(t) *)bplus;
    1309           6 : }
    1310             : 
    1311             : void *
    1312           6 : BPLUS_(leave)( BPLUS_(t) * join ) {
    1313           6 :   if( FD_UNLIKELY( !join ) ) {
    1314           3 :     FD_LOG_WARNING(( "NULL join" ));
    1315           3 :     return NULL;
    1316           3 :   }
    1317             : 
    1318           3 :   return (void *)join;
    1319           6 : }
    1320             : 
    1321             : void *
    1322          12 : BPLUS_(delete)( void * shbplus ) {
    1323          12 :   BPLUS_(private_t) * bplus = (BPLUS_(private_t) *)shbplus;
    1324             : 
    1325          12 :   if( FD_UNLIKELY( !bplus ) ) {
    1326           3 :     FD_LOG_WARNING(( "NULL shbplus" ));
    1327           3 :     return NULL;
    1328           3 :   }
    1329             : 
    1330           9 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)bplus, BPLUS_ALIGN ) ) ) {
    1331           3 :     FD_LOG_WARNING(( "misaligned shbplus" ));
    1332           3 :     return NULL;
    1333           3 :   }
    1334             : 
    1335           6 :   if( FD_UNLIKELY( bplus->magic!=BPLUS_MAGIC ) ) {
    1336           3 :     FD_LOG_WARNING(( "bad magic" ));
    1337           3 :     return NULL;
    1338           3 :   }
    1339             : 
    1340           3 :   FD_COMPILER_MFENCE();
    1341           3 :   bplus->magic = 0UL;
    1342           3 :   FD_COMPILER_MFENCE();
    1343             : 
    1344           3 :   return (void *)bplus;
    1345           6 : }
    1346             : 
    1347             : BPLUS_PAIR_T const *
    1348             : BPLUS_(query_const)( BPLUS_(t)   const * join,
    1349    16889784 :                      BPLUS_KEY_T const * query ) {
    1350    16889784 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
    1351             : 
    1352             :   /* If an empty bplus tree, not found */
    1353             : 
    1354    16889784 :   ulong tree_off = bplus->root_off;
    1355    16889784 :   if( FD_UNLIKELY( !tree_off ) ) return NULL; /* optimize for big trees */
    1356             : 
    1357             :   /* At this point, the bplus tree is not empty.  Find the leaf that
    1358             :      might contain query. */
    1359             : 
    1360    16889409 :   ulong leaf_lo = bplus->leaf_lo;
    1361   107671575 :   while( FD_LIKELY( !BPLUS_(private_is_leaf)( tree_off, leaf_lo ) ) ) { /* optimize for big trees */
    1362    90782166 :     BPLUS_(private_node_t) const * node = BPLUS_(private_node_const)( bplus, tree_off );
    1363    90782166 :     tree_off = node->tree_off[ BPLUS_(private_node_query)( node->pivot, node->tree_cnt, query ) ];
    1364    90782166 :   }
    1365    16889409 :   BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, tree_off );
    1366             : 
    1367             :   /* At this point, leaf might contain query.  Query the leaf */
    1368             : 
    1369    16889409 :   pair_t const * pair     = leaf->pair;
    1370    16889409 :   ulong          pair_cnt = leaf->pair_cnt;
    1371    16889409 :   ulong          pair_idx = BPLUS_(private_pair_query)( pair, pair_cnt, query );
    1372             : 
    1373    16889409 :   return fd_ptr_if( pair_idx<pair_cnt, &pair[ pair_idx ], NULL );
    1374    16889784 : }
    1375             : 
    1376             : BPLUS_PAIR_T *
    1377             : BPLUS_(private_insert)( BPLUS_(t) *         join,
    1378             :                         BPLUS_KEY_T const * key,
    1379             :                         int                 upsert,
    1380     7525116 :                         int *               _insert ) {
    1381     7525116 :   BPLUS_(private_t) * bplus = BPLUS_(private)( join );
    1382             : 
    1383             :   /* If the bplus tree is empty, create the root leaf and insert the key
    1384             :      into it */
    1385             : 
    1386     7525116 :   ulong tree_off = bplus->root_off;
    1387     7525116 :   if( FD_UNLIKELY( !tree_off ) ) { /* Empty bplus, optimize for big */
    1388             : 
    1389         189 :     BPLUS_(private_leaf_t) * root = BPLUS_(private_leaf_acquire)( bplus );
    1390         189 :     if( FD_UNLIKELY( !root ) ) return NULL; /* no room for insert */
    1391             : 
    1392         189 :     root->prev_off               = 0UL;
    1393         189 :     root->next_off               = 0UL;
    1394         189 :     root->pair_cnt               = 1UL;
    1395         189 :     root->pair[0].BPLUS_PAIR_KEY = key[0];
    1396         189 :     ulong root_off = BPLUS_(private_off)( bplus, root );
    1397         189 :     bplus->root_off     = root_off;
    1398         189 :     bplus->leaf_min_off = root_off;
    1399         189 :     bplus->leaf_max_off = root_off;
    1400             : 
    1401         189 :     *_insert = 1;
    1402         189 :     return &root->pair[0];
    1403             : 
    1404         189 :   }
    1405             : 
    1406             :   /* At this point, the bplus tree is not empty.  We recurse through
    1407             :      interior nodes to find the leaf that should hold key, splitting
    1408             :      interior nodes as we go. */
    1409             : 
    1410     7524927 :   ulong tree_min = BPLUS_(private_tree_min)();
    1411     7524927 :   ulong tree_max = BPLUS_(private_tree_max)(); /* ==tree_min*2 */
    1412             : 
    1413     7524927 :   BPLUS_(private_node_t) * parent    = NULL;
    1414     7524927 :   ulong                    child_idx = 0UL;
    1415             : 
    1416     7524927 :   ulong leaf_lo = bplus->leaf_lo;
    1417    47962443 :   while( FD_LIKELY( !BPLUS_(private_is_leaf)( tree_off, leaf_lo ) ) ) { /* Optimize for big trees */
    1418    40437516 :     BPLUS_(private_node_t) * node = BPLUS_(private_node)( bplus, tree_off );
    1419             : 
    1420             :     /* At this point, we should insert key into one of the node's trees
    1421             :        and tree_cnt is in [2,tree_max] (root) or [tree_min,tree_max]
    1422             :        (non-root).  If the node has a parent, parent and all node's
    1423             :        siblings are nodes and parent has in [2,tree_max-1] (root parent)
    1424             :        or [tree_min,tree_max-1] (non-root parent) children.  (tree_max-1
    1425             :        because if it had tree_max children when insert started, we would
    1426             :        have split it on the previous iteration).
    1427             : 
    1428             :        If the node is full, split it. */
    1429             : 
    1430    40437516 :     ulong tree_cnt = node->tree_cnt;
    1431    40437516 :     if( FD_UNLIKELY( tree_cnt==tree_max ) ) { /* Optimize for high radix */
    1432             : 
    1433             :       /* Acquire resources.  If node is the root, this includes making a
    1434             :          new root node and making the new root node's parent. */
    1435             : 
    1436      231162 :       BPLUS_(private_node_t) * new_node = BPLUS_(private_node_acquire)( bplus );
    1437      231162 :       if( FD_UNLIKELY( !new_node ) ) return NULL; /* No room for insert */
    1438             : 
    1439      231162 :       if( FD_UNLIKELY( !parent ) ) {
    1440         714 :         parent = BPLUS_(private_node_acquire)( bplus );
    1441         714 :         if( FD_UNLIKELY( !parent ) ) {
    1442           0 :           BPLUS_(private_node_release)( bplus, new_node );
    1443           0 :           return NULL; /* No room for insert */
    1444           0 :         }
    1445             : 
    1446         714 :         bplus->root_off = BPLUS_(private_off)( bplus, parent );
    1447             : 
    1448         714 :         parent->tree_cnt    = 1UL; /* Will be incremented to 2 by the child_insert below. */
    1449         714 :         parent->tree_off[0] = BPLUS_(private_off)( bplus, node );
    1450             : 
    1451         714 :         child_idx = 0UL;
    1452         714 :       }
    1453             : 
    1454             :       /* At this point, node is child child_idx of parent and we need to
    1455             :          split node.  Further, new_node is the node that will be created
    1456             :          by the split and parent has room to insert a link to new_node.
    1457             :          Split node evenly into new_node and update the parent
    1458             :          accordingly. */
    1459             : 
    1460      231162 :       BPLUS_KEY_T const * median = &node->pivot[ tree_min-1UL ];
    1461             : 
    1462      231162 :       node->tree_cnt = tree_min;
    1463             : 
    1464      231162 :       new_node->tree_cnt = tree_min;
    1465      231162 :       memcpy( new_node->tree_off, node->tree_off + tree_min, sizeof(ulong)      * tree_min      );
    1466      231162 :       memcpy( new_node->pivot,    node->pivot    + tree_min, sizeof(BPLUS_KEY_T)*(tree_min-1UL) );
    1467             : 
    1468      231162 :       BPLUS_(private_child_insert)( parent, child_idx+1UL, BPLUS_(private_off)( bplus, new_node ), median );
    1469             : 
    1470             :       /* Move into the appropriate split */
    1471             : 
    1472      231162 :       node     = fd_ptr_if( BPLUS_(private_key_cmp)( key, median )<0, node, new_node );
    1473      231162 :       tree_cnt = tree_min;
    1474      231162 :     }
    1475             : 
    1476             :     /* At this point, we should insert key into one of the node's trees
    1477             :        and tree_cnt is in [2,tree_max-1] (root) or [tree_min,tree_max-1]
    1478             :        (non root) such that we are guaranteed to be able to insert. */
    1479             : 
    1480    40437516 :     parent    = node;
    1481    40437516 :     child_idx = BPLUS_(private_node_query)( node->pivot, tree_cnt, key );
    1482    40437516 :     tree_off  = node->tree_off[ child_idx ];
    1483    40437516 :   }
    1484             : 
    1485     7524927 :   BPLUS_(private_leaf_t) * leaf = BPLUS_(private_leaf)( bplus, tree_off );
    1486             : 
    1487             :   /* At this point, we'd like to insert key into leaf.  But if leaf is
    1488             :      full, we split it to make room. */
    1489             : 
    1490     7524927 :   ulong pair_min = BPLUS_(private_pair_min)();
    1491     7524927 :   ulong pair_max = BPLUS_(private_pair_max)(); /* ==pair_min*2 */
    1492             : 
    1493     7524927 :   ulong pair_cnt = (ulong)leaf->pair_cnt;
    1494     7524927 :   if( FD_UNLIKELY( pair_cnt==pair_max ) ) { /* optimize for high radix */
    1495             : 
    1496             :     /* Acquire resources.  If leaf is the root, this includes making a
    1497             :        new root node and making the new root node's parent. */
    1498             : 
    1499      967035 :     BPLUS_(private_leaf_t) * new_leaf = BPLUS_(private_leaf_acquire)( bplus );
    1500      967035 :     if( FD_UNLIKELY( !new_leaf ) ) return NULL; /* No room for insert */
    1501             : 
    1502      967032 :     if( FD_UNLIKELY( !parent ) ) {
    1503         159 :       parent = BPLUS_(private_node_acquire)( bplus );
    1504         159 :       if( FD_UNLIKELY( !parent ) ) {
    1505           0 :         BPLUS_(private_leaf_release)( bplus, new_leaf );
    1506           0 :         return NULL; /* No room to insert */
    1507           0 :       }
    1508             : 
    1509         159 :       bplus->root_off = BPLUS_(private_off)( bplus, parent );
    1510             : 
    1511         159 :       parent->tree_cnt    = 1UL; /* Will be incremented to 2 below */
    1512         159 :       parent->tree_off[0] = BPLUS_(private_off)( bplus, leaf );
    1513             : 
    1514         159 :       child_idx = 0UL;
    1515         159 :     }
    1516             : 
    1517             :     /* At this point, leaf is child child_idx of parent and we need to
    1518             :        split leaf.  Further, new_leaf is the leaf that will be created
    1519             :        by the split and parent has room to insert a link to new_leaf.
    1520             :        Split leaf evenly into new_leaf and update the parent
    1521             :        accordingly.  Splitting this leaf might make a new max leaf (it
    1522             :        will never make a new min leaf). */
    1523             : 
    1524      967032 :     BPLUS_KEY_T const * median = &leaf->pair[ pair_min ].BPLUS_PAIR_KEY;
    1525             : 
    1526      967032 :     ulong next_off = leaf->next_off;
    1527             : 
    1528      967032 :     leaf->pair_cnt = pair_min;
    1529      967032 :     leaf->next_off = BPLUS_(private_off)( bplus, new_leaf );
    1530             : 
    1531      967032 :     new_leaf->pair_cnt = pair_min;
    1532      967032 :     new_leaf->prev_off = BPLUS_(private_off)( bplus, leaf );
    1533      967032 :     new_leaf->next_off = next_off;
    1534      967032 :     memcpy( &new_leaf->pair[0], &leaf->pair[pair_min], sizeof(pair_t)*pair_min );
    1535             : 
    1536             :     /* FIXME: BRANCHLESS? */
    1537      967032 :     ulong new_leaf_off = BPLUS_(private_off)( bplus, new_leaf );
    1538      967032 :     if( FD_UNLIKELY( !next_off ) ) bplus->leaf_max_off                               = new_leaf_off;
    1539      963618 :     else                           BPLUS_(private_leaf)( bplus, next_off )->prev_off = new_leaf_off;
    1540             : 
    1541      967032 :     BPLUS_(private_child_insert)( parent, child_idx+1UL, new_leaf_off, median );
    1542             : 
    1543             :     /* Move into the appropriate split */
    1544             : 
    1545      967032 :     leaf     = (BPLUS_(private_key_cmp)( key, median )<0) ? leaf : new_leaf;
    1546      967032 :     pair_cnt = pair_min;
    1547      967032 :   }
    1548             : 
    1549             :   /* At this point, leaf either contains key or is where we should
    1550             :      insert key.  Further, pair_cnt is in [1,pair_max-1] (root) or
    1551             :      [pair_min,pair_max-1] (non root).  Search for key in the leaf.  If
    1552             :      key is not in the leaf, the search will reveal where to put the
    1553             :      key. */
    1554             : 
    1555     7524924 :   BPLUS_PAIR_T * pair = leaf->pair;
    1556     7524924 :   ulong          i0   = 0UL;
    1557     7524924 :   ulong          i1   = pair_cnt;
    1558    12506436 :   do {
    1559             : 
    1560             :     /* At this point, pairs in [0,i0) are before key, pairs in
    1561             :        [i1,pair_cnt) are after key and pairs in [i0,i1) (non-empty) are
    1562             :        not known.  Probe the middle of this range for key. */
    1563             : 
    1564    12506436 :     ulong im = (i0+i1) >> 1; /* no overflow */
    1565             : 
    1566    12506436 :     int cmp = BPLUS_(private_key_cmp)( &pair[ im ].BPLUS_PAIR_KEY, key );
    1567             : 
    1568             :     /* If cmp==0, pair im holds the key and we are done.  Otherwise, if
    1569             :        cmp<0 / cmp>0, pair im is before / after key.  We adjust the
    1570             :        ranges appropriately and recurse. */
    1571             : 
    1572    12506436 :     if( FD_UNLIKELY( !cmp ) ) { /* optimize for big trees */
    1573     3752997 :       leaf->pair_cnt = pair_cnt;
    1574     3752997 :       if( !upsert ) return NULL; /* compile time */
    1575     1875972 :       *_insert = 0;
    1576     1875972 :       return &pair[ im ];
    1577     3752997 :     }
    1578     8753439 :     i0 = fd_ulong_if( cmp>0, i0, im+1UL );
    1579     8753439 :     i1 = fd_ulong_if( cmp>0, im, i1     );
    1580             : 
    1581     8753439 :   } while( i1>i0 );
    1582             : 
    1583             :   /* At this point, leaf does not contain key, pairs [0,i0) are before
    1584             :      key, pairs [i0,pair_cnt) are after key and we have room for key.
    1585             :      Move pairs [i0,pair_cnt) right 1 to make room and insert the key at
    1586             :      pair i0. */
    1587             : 
    1588     3771927 :   memmove( pair+i0+1UL, pair+i0, (pair_cnt-i0)*sizeof(BPLUS_PAIR_T) );
    1589     3771927 :   pair[ i0 ].BPLUS_PAIR_KEY = key[0];
    1590     3771927 :   leaf->pair_cnt = pair_cnt + 1UL;
    1591     3771927 :   *_insert = 1;
    1592     3771927 :   return &pair[ i0 ];
    1593     7524924 : }
    1594             : 
    1595             : int
    1596             : BPLUS_(remove_key)( BPLUS_(t)         * join,
    1597     5640936 :                     BPLUS_KEY_T const * key ) {
    1598     5640936 :   BPLUS_(private_t) * bplus = BPLUS_(private)( join );
    1599             : 
    1600             :   /* If tree is empty, nothing to remove */
    1601             : 
    1602     5640936 :   ulong tree_off = bplus->root_off;
    1603     5640936 :   if( FD_UNLIKELY( !tree_off ) ) return -1; /* not found, optimize for found */
    1604             : 
    1605             :   /* At this point, the tree is not empty.  Find the path through the
    1606             :      tree to the leaf with the key to remove.  Note that 128 is more
    1607             :      than enough given strong lg N depth algorithmic guarantees and wide
    1608             :      radices. */
    1609             : 
    1610     5640840 :   BPLUS_(private_node_t) * path_node    [ 128 ];
    1611     5640840 :   ulong                    path_tree_idx[ 128 ];
    1612     5640840 :   ulong                    path_cnt = 0UL;
    1613             : 
    1614     5640840 :   ulong leaf_lo = bplus->leaf_lo;
    1615    35960184 :   while( FD_LIKELY( !BPLUS_(private_is_leaf)( tree_off, leaf_lo ) ) ) { /* optimize for big trees */
    1616    30319344 :     BPLUS_(private_node_t) * node = BPLUS_(private_node)( bplus, tree_off );
    1617             : 
    1618    30319344 :     ulong tree_idx = BPLUS_(private_node_query)( node->pivot, node->tree_cnt, key );
    1619             : 
    1620    30319344 :     path_node    [ path_cnt ] = node;
    1621    30319344 :     path_tree_idx[ path_cnt ] = tree_idx;
    1622    30319344 :     path_cnt++;
    1623             : 
    1624    30319344 :     tree_off = node->tree_off[ tree_idx ];
    1625    30319344 :   }
    1626             : 
    1627     5640840 :   BPLUS_(private_leaf_t) * leaf = BPLUS_(private_leaf)( bplus, tree_off );
    1628             : 
    1629             :   /* At this point, leaf might contain key.  Search for key. */
    1630             : 
    1631     5640840 :   BPLUS_PAIR_T * pair     = leaf->pair;
    1632     5640840 :   ulong          pair_cnt = leaf->pair_cnt;
    1633     5640840 :   ulong          pair_idx = BPLUS_(private_pair_query)( pair, pair_cnt, key );
    1634             : 
    1635     5640840 :   if( FD_UNLIKELY( pair_idx>=pair_cnt ) ) return -1; /* not found, optimize for found */
    1636             : 
    1637             :   /* At this point, pair[ pair_idx ] is the pair to remove.  Remove it. */
    1638             : 
    1639     3763473 :   pair_cnt--;
    1640     6947751 :   for( ulong idx=pair_idx; idx<pair_cnt; idx++ ) pair[idx] = pair[idx+1UL];
    1641     3763473 :   leaf->pair_cnt = pair_cnt; /* FIXME: MOVE BELOW? */
    1642             : 
    1643             :   /* At this point, the leaf might be unbalanced but everything else in
    1644             :      the bplus tree is balanced. */
    1645             : 
    1646     3763473 :   if( FD_UNLIKELY( !path_cnt ) ) { /* optimize for big trees */
    1647             : 
    1648             :     /* At this point, we removed a pair from the root leaf and the
    1649             :        leaf's pair_cnt is in [0,pair_max-1] .  If there are still pairs
    1650             :        in the leaf, the bplus tree is still balanced and we are done.
    1651             :        Otherwise, we release the leaf and make an empty bplus tree
    1652             :        (which is balanced by definition). */
    1653             : 
    1654         561 :     if( FD_LIKELY( pair_cnt ) ) return 0; /* optimize for big trees */
    1655         186 :     bplus->root_off     = 0UL;
    1656         186 :     bplus->leaf_min_off = 0UL;
    1657         186 :     bplus->leaf_max_off = 0UL;
    1658         186 :     BPLUS_(private_leaf_release)( bplus, leaf );
    1659         186 :     return 0;
    1660             : 
    1661         561 :   }
    1662             : 
    1663             :   /* At this point, we removed a pair from a non-root leaf and the
    1664             :      leaf's pair_cnt is in [pair_min-1,pair_max-1].  If there are at
    1665             :      least pair_min pairs left in the leaf, the bplus tree is still
    1666             :      balanced and we are done. */
    1667             : 
    1668     3762912 :   ulong pair_min = BPLUS_(private_pair_min)();
    1669     3762912 :   ulong pair_max = BPLUS_(private_pair_max)();
    1670             : 
    1671     3762912 :   if( FD_LIKELY( pair_cnt>=pair_min ) ) return 0; /* optimize for big trees */
    1672             : 
    1673             :   /* At this point, we removed a pair from a non-root leaf and its
    1674             :      pair_cnt is pair_min-1.  As such, it is not balanced with its
    1675             :      siblings (leaf must have at least leaf_min-1 siblings that must
    1676             :      also be leaves with a pair_cnt in [pair_min,pair_max]).  Determine
    1677             :      which sibling to use for rebalancing and how to rebalance with this
    1678             :      sibling.  This sibling will have a pair cnt in [pair_min,pair_max].
    1679             : 
    1680             :      Note: Could be more adaptive here (e.g. pick the larger sibling
    1681             :      when leaf is a middle child). */
    1682             : 
    1683     1660878 :   path_cnt--;
    1684     1660878 :   BPLUS_(private_node_t) * parent    = path_node    [ path_cnt ];
    1685     1660878 :   ulong                    child_idx = path_tree_idx[ path_cnt ];
    1686             : 
    1687     1660878 :   ulong sib0_idx = child_idx - (ulong)(child_idx>0UL);
    1688     1660878 :   ulong sib1_idx = sib0_idx  + 1UL;
    1689             : 
    1690     1660878 :   ulong sib0_off = parent->tree_off[ sib0_idx ];
    1691     1660878 :   ulong sib1_off = parent->tree_off[ sib1_idx ];
    1692             : 
    1693     1660878 :   BPLUS_(private_leaf_t) * sib0 = BPLUS_(private_leaf)( bplus, sib0_off );
    1694     1660878 :   BPLUS_(private_leaf_t) * sib1 = BPLUS_(private_leaf)( bplus, sib1_off );
    1695             : 
    1696     1660878 :   ulong sib0_pair_cnt = sib0->pair_cnt;
    1697     1660878 :   ulong sib1_pair_cnt = sib1->pair_cnt;
    1698             : 
    1699     1660878 :   ulong reb_pair_cnt = sib0_pair_cnt + sib1_pair_cnt; /* in [pair_max-1,2*pair_max-1]. */
    1700     1660878 :   if( FD_LIKELY( reb_pair_cnt>=pair_max ) ) {
    1701             : 
    1702             :     /* At this point, reb_pair_cnt is in [pair_max,2*pair_max-1].
    1703             :        Divide these as evenly as possible between sib0 and sib1 and
    1704             :        update the parent's pivot accordingly.  Since we do not remove
    1705             :        any trees from the parent, this will rebalance the whole bplus
    1706             :        tree fully and we are done. */
    1707             : 
    1708      697206 :     ulong new_sib0_pair_cnt = reb_pair_cnt >> 1;
    1709      697206 :     ulong new_sib1_pair_cnt = reb_pair_cnt - new_sib0_pair_cnt;
    1710             : 
    1711      697206 :     if( new_sib0_pair_cnt>sib0_pair_cnt ) { /* Shift pairs from sib1 into sib0 */
    1712             : 
    1713      167931 :       ulong delta = new_sib0_pair_cnt - sib0_pair_cnt;
    1714      167931 :       memcpy ( sib0->pair + sib0_pair_cnt, sib1->pair,         sizeof(BPLUS_PAIR_T)*delta             );
    1715      167931 :       memmove( sib1->pair,                 sib1->pair + delta, sizeof(BPLUS_PAIR_T)*new_sib1_pair_cnt );
    1716             : 
    1717      529275 :     } else { /* Shift pairs from sib0 into sib1 */
    1718             : 
    1719      529275 :       ulong delta = sib0_pair_cnt - new_sib0_pair_cnt;
    1720      529275 :       memmove( sib1->pair + delta, sib1->pair,                     sizeof(BPLUS_PAIR_T)*sib1_pair_cnt );
    1721      529275 :       memcpy ( sib1->pair,         sib0->pair + new_sib0_pair_cnt, sizeof(BPLUS_PAIR_T)*delta         );
    1722             : 
    1723      529275 :     }
    1724             : 
    1725      697206 :     sib0->pair_cnt = new_sib0_pair_cnt;
    1726      697206 :     sib1->pair_cnt = new_sib1_pair_cnt;
    1727             : 
    1728      697206 :     parent->pivot[sib0_idx] = sib1->pair[0].BPLUS_PAIR_KEY;
    1729      697206 :     return 0;
    1730      697206 :   }
    1731             : 
    1732             :   /* At this point, reb_pair_cnt is pair_max-1 such that these siblings
    1733             :      must be merged to restore balance among the leaves.  This might
    1734             :      change the leaf max from sib1 to sib0. */
    1735             : 
    1736      963672 :   memcpy( sib0->pair + sib0_pair_cnt, sib1->pair, sizeof(BPLUS_PAIR_T)*sib1_pair_cnt );
    1737      963672 :   sib0->pair_cnt = reb_pair_cnt;
    1738             : 
    1739      963672 :   ulong sib2_off = sib1->next_off;
    1740      963672 :   sib0->next_off = sib2_off;
    1741             : 
    1742             :   /* FIXME: DO BRANCHLESS? */
    1743      963672 :   if( FD_UNLIKELY( !sib2_off ) ) bplus->leaf_max_off                               = sib0_off;
    1744      961158 :   else                           BPLUS_(private_leaf)( bplus, sib2_off )->prev_off = sib0_off;
    1745             : 
    1746      963672 :   BPLUS_(private_child_remove)( parent, sib1_idx );
    1747      963672 :   BPLUS_(private_leaf_release)( bplus, sib1 );
    1748             : 
    1749             :   /* The merge might have unbalance parent among its siblings.  If it
    1750             :      has not, we are done.  Otherwise, we rebalance parent among its
    1751             :      siblings.  That might unbalance the grandparent among its siblings.
    1752             :      And so on along the path potentially all the back to the bplus tree
    1753             :      root. */
    1754             : 
    1755      963672 :   ulong tree_min = BPLUS_(private_tree_min)();
    1756      963672 :   ulong tree_max = BPLUS_(private_tree_max)();
    1757             : 
    1758     1193622 :   while( FD_LIKELY( path_cnt ) ) { /* optimize for big trees */
    1759     1189584 :     BPLUS_(private_node_t) * child = parent;
    1760             : 
    1761             :     /* At this point, because we just removed a tree from child, child's
    1762             :        tree_cnt is in [tree_min-1,tree_max-1] but everything else is
    1763             :        balanced.  If the child has at least tree_min trees, the bplus
    1764             :        tree is still balanced. */
    1765             : 
    1766     1189584 :     ulong child_tree_cnt = child->tree_cnt;
    1767     1189584 :     if( FD_LIKELY( child_tree_cnt>=tree_min ) ) return 0; /* optimize for big trees */
    1768             : 
    1769             :     /* At this point, child's tree_cnt is tree_min-1.  As such, it is
    1770             :        not balanced with its siblings (child must have at least
    1771             :        leaf_min-1 siblings that must also be nodes with a tree_cnt in
    1772             :        [tree_min,tree_max]).  Determine which sibling to use for
    1773             :        rebalancing and how to rebalance.
    1774             : 
    1775             :        Note: Could be more adaptive here (e.g. pick the larger sibling
    1776             :        if a middle child). */
    1777             : 
    1778      410850 :     path_cnt--;
    1779      410850 :     parent    = path_node    [ path_cnt ];
    1780      410850 :     child_idx = path_tree_idx[ path_cnt ];
    1781             : 
    1782      410850 :     ulong sib0_idx = child_idx - (ulong)(child_idx>0UL);
    1783      410850 :     ulong sib1_idx = sib0_idx  + 1UL;
    1784             : 
    1785      410850 :     ulong sib0_off = parent->tree_off[ sib0_idx ];
    1786      410850 :     ulong sib1_off = parent->tree_off[ sib1_idx ];
    1787             : 
    1788      410850 :     BPLUS_(private_node_t) * sib0 = BPLUS_(private_node)( bplus, sib0_off );
    1789      410850 :     BPLUS_(private_node_t) * sib1 = BPLUS_(private_node)( bplus, sib1_off );
    1790             : 
    1791      410850 :     ulong sib0_tree_cnt = sib0->tree_cnt;
    1792      410850 :     ulong sib1_tree_cnt = sib1->tree_cnt;
    1793             : 
    1794      410850 :     ulong reb_tree_cnt = sib0_tree_cnt + sib1_tree_cnt; /* in [tree_max-1,2*tree_max-1]. */
    1795      410850 :     if( FD_LIKELY( reb_tree_cnt>=tree_max ) ) {
    1796             : 
    1797             :       /* At this point, reb_tree_cnt is in [tree_max,2*tree_max-1].
    1798             :          Divide these as evenly as possible between sib0 and sib1 and
    1799             :          update the parent's pivot accordingly.  Since we do not remove
    1800             :          any trees from parent, this will rebalance the whole bplus tree
    1801             :          and we are done. */
    1802             : 
    1803      180900 :       ulong new_sib0_tree_cnt = reb_tree_cnt >> 1;
    1804      180900 :       ulong new_sib1_tree_cnt = reb_tree_cnt - new_sib0_tree_cnt;
    1805             : 
    1806      180900 :       if( new_sib0_tree_cnt>sib0_tree_cnt ) { /* Shift leading sib1 trees to trailing sib0 trees */
    1807             : 
    1808       43557 :         ulong delta = new_sib0_tree_cnt - sib0_tree_cnt;
    1809       43557 :         memcpy ( sib0->tree_off + sib0_tree_cnt, sib1->tree_off,         sizeof(ulong)*delta             );
    1810       43557 :         memmove( sib1->tree_off,                 sib1->tree_off + delta, sizeof(ulong)*new_sib1_tree_cnt );
    1811             : 
    1812             :         /* Copy parent pivot and leading delta-1 sib1 pivots into sib0. */
    1813             : 
    1814       43557 :         sib0->pivot[ sib0_tree_cnt-1UL ] = parent->pivot[ sib0_idx ];
    1815       43557 :         memcpy( sib0->pivot + sib0_tree_cnt, sib1->pivot, (delta-1UL)*sizeof(BPLUS_KEY_T) );
    1816             : 
    1817             :         /* At this point, there is 1 hole in the parent pivots and
    1818             :            delta-1 holes in the leading sib1 pivots.  Copy the next sib1
    1819             :            pivot to the parent. */
    1820             : 
    1821       43557 :         parent->pivot[ sib0_idx ] = sib1->pivot[ delta-1UL ];
    1822             : 
    1823             :         /* At this point, there are delta holes in the leading sib1
    1824             :            pivots.  Shift remaining sib1 pivots down delta. */
    1825             : 
    1826       43557 :         memmove( sib1->pivot, sib1->pivot+delta, (new_sib1_tree_cnt-1UL)*sizeof(BPLUS_KEY_T) );
    1827             : 
    1828      137343 :       } else { /* Shift trailing sib0 trees to leading sib1 trees */
    1829             : 
    1830      137343 :         ulong delta = sib0_tree_cnt - new_sib0_tree_cnt;
    1831      137343 :         memmove( sib1->tree_off + delta, sib1->tree_off,                     sizeof(ulong)*sib1_tree_cnt );
    1832      137343 :         memcpy ( sib1->tree_off,         sib0->tree_off + new_sib0_tree_cnt, sizeof(ulong)*delta         );
    1833             : 
    1834             :         /* Shift sib1 pivots up delta. */
    1835             : 
    1836      137343 :         memmove( sib1->pivot+delta, sib1->pivot, (sib1_tree_cnt-1UL)*sizeof(BPLUS_KEY_T) );
    1837             : 
    1838             :         /* At this point, there are delta holes in the leading sib1
    1839             :            pivots.  Copy trailing delta-1 sib0 pivots and parent pivot
    1840             :            into sib1. */
    1841             : 
    1842      137343 :         memcpy( sib1->pivot, sib0->pivot+new_sib0_tree_cnt, (delta-1UL)*sizeof(BPLUS_KEY_T) );
    1843      137343 :         sib1->pivot[ delta-1UL ] = parent->pivot[ sib0_idx ];
    1844             : 
    1845             :         /* At this point, there is 1 hole in the parent pivot.  Copy
    1846             :            trailing sib0 pivot into parent. */
    1847             : 
    1848      137343 :         parent->pivot[ sib0_idx ] = sib0->pivot[ new_sib0_tree_cnt-1UL ];
    1849             : 
    1850      137343 :       }
    1851             : 
    1852      180900 :       sib0->tree_cnt = new_sib0_tree_cnt;
    1853      180900 :       sib1->tree_cnt = new_sib1_tree_cnt;
    1854      180900 :       return 0;
    1855      180900 :     }
    1856             : 
    1857             :     /* At this point, reb_tree_cnt is tree_max-1 such that these
    1858             :        siblings must be merged to restore balance among siblings.  Since
    1859             :        this might unbalance parent relative to its siblings, we need to
    1860             :        keep iterating. */
    1861             : 
    1862      229950 :     memcpy( sib0->tree_off + sib0_tree_cnt, sib1->tree_off, sizeof(ulong)*sib1_tree_cnt );
    1863             : 
    1864      229950 :     sib0->pivot[ sib0_tree_cnt-1UL ] = parent->pivot[ sib0_idx ];
    1865      229950 :     memcpy( sib0->pivot + sib0_tree_cnt, sib1->pivot, sizeof(BPLUS_KEY_T)*(sib1_tree_cnt-1UL) );
    1866             : 
    1867      229950 :     sib0->tree_cnt = reb_tree_cnt;
    1868             : 
    1869      229950 :     BPLUS_(private_child_remove)( parent, sib1_idx );
    1870             : 
    1871      229950 :     BPLUS_(private_node_release)( bplus, sib1 );
    1872      229950 :   }
    1873             : 
    1874             :   /* At this point, parent is the root node and we just removed a tree
    1875             :      from it.  If parent still has more than 1 tree, the bplus tree is
    1876             :      balanced and we are done.  Otherwise, we make parent's sole child
    1877             :      the new root and release parent to finish balancing the tree. */
    1878             : 
    1879        4038 :   if( FD_LIKELY( parent->tree_cnt>1UL ) ) return 0; /* optimize for big trees */
    1880             : 
    1881         855 :   bplus->root_off = parent->tree_off[ 0 ];
    1882         855 :   BPLUS_(private_node_release)( bplus, parent );
    1883         855 :   return 0;
    1884        4038 : }
    1885             : 
    1886             : BPLUS_(iter_t)
    1887             : BPLUS_(private_iter)( BPLUS_(t)   const * join,
    1888             :                       BPLUS_KEY_T const * query,
    1889    14996460 :                       int                 op ) {
    1890    14996460 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
    1891             : 
    1892    14996460 :   BPLUS_(iter_t) iter;
    1893             : 
    1894             :   /* If the bplus is empty or query is NULL, return nul */
    1895             : 
    1896    14996460 :   ulong tree_off = bplus->root_off;
    1897    14996460 :   if( FD_UNLIKELY( (!tree_off) | (!query) ) ) { /* empty, optimize for big trees */
    1898         348 :     iter.leaf_off = 0UL;
    1899         348 :     iter.pair_idx = 0UL;
    1900         348 :     return iter;
    1901         348 :   }
    1902             : 
    1903             :   /* At this point, the bplus is not empty.  Find the leaf that might
    1904             :      contain query. */
    1905             : 
    1906    14996112 :   ulong leaf_lo = bplus->leaf_lo;
    1907    95611296 :   while( FD_LIKELY( !BPLUS_(private_is_leaf)( tree_off, leaf_lo ) ) ) { /* Optimize for big trees */
    1908    80615184 :     BPLUS_(private_node_t) const * node = BPLUS_(private_node_const)( bplus, tree_off );
    1909    80615184 :     tree_off = node->tree_off[ BPLUS_(private_node_query)( node->pivot, node->tree_cnt, query ) ];
    1910    80615184 :   }
    1911    14996112 :   BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, tree_off );
    1912             : 
    1913             :   /* At this point, pairs in the previous leaf (if any) have keys less
    1914             :      than query and pairs in the next leaf (if any) have keys greater
    1915             :      than query.  Search the leaf for query. */
    1916             : 
    1917    14996112 :   BPLUS_PAIR_T const * pair     = leaf->pair;
    1918    14996112 :   ulong                pair_cnt = leaf->pair_cnt;
    1919             : 
    1920    14996112 :   ulong i0 = 0UL;
    1921    14996112 :   ulong i1 = pair_cnt;
    1922             : 
    1923    23399328 :   do {
    1924             : 
    1925             :     /* At this point, the range [i0,i1) contains at least 1 pair.  Pairs
    1926             :        [0,i0) have keys less than query, pairs [i1,pair_cnt) have keys
    1927             :        greater than query and we don't know about pairs [i0,i1).  Test
    1928             :        the pair in the middle.
    1929             : 
    1930             :        If this pair's key matches query, because all keys are unique, we
    1931             :        know that pair im is the first pair greater than or equal to
    1932             :        query and that pair im+1 is the first pair greater than query.
    1933             : 
    1934             :        If this pair's key is greater than query, we know all pairs in
    1935             :        [im,pair_cnt) are greater than query so we update i1 to im.
    1936             : 
    1937             :        If this pair's key is less than query, we know that all pairs in
    1938             :        [0,im+1) are less than query so we update i0 to im+1. */
    1939             : 
    1940    23399328 :     ulong im = (i0+i1) >> 1; /* No overflow */
    1941             : 
    1942    23399328 :     int cmp = BPLUS_(private_key_cmp)( &pair[im].BPLUS_PAIR_KEY, query );
    1943    23399328 :     if( FD_UNLIKELY( !cmp ) ) { /* optimize for big trees */
    1944             : 
    1945             :       /* At this point, pairs [0,im) have keys less than query, pair im
    1946             :          key matches query and pairs (im,pair_cnt) are greater than
    1947             :          query.  If:
    1948             : 
    1949             :            op==0 (GE): pick i0 == im   such that [0,i0) are <  query and [i0,pair_cnt) are >= query
    1950             :            op==1 (GT): pick i0 == im+1 such that [0,i0) are <= query and [i0,pair_cnt) are >  query
    1951             :            op==2 (LE): pick i0 == im+1 such that [0,i0) are <= query and [i0,pair_cnt) are >  query
    1952             :            op==3 (LT): pick i0 == im   such that [0,i0) are <  query and [i0,pair_cnt) are >= query */
    1953             : 
    1954     7494132 :       i0 = im + (ulong)((op==1) | (op==2)); /* compile time */
    1955     7494132 :       break;
    1956     7494132 :     }
    1957    15905196 :     i0 = fd_ulong_if( cmp>0, i0, im+1UL );
    1958    15905196 :     i1 = fd_ulong_if( cmp>0, im, i1     );
    1959             : 
    1960    15905196 :   } while( FD_LIKELY( i1-i0 ) ); /* optimize for big trees */
    1961             : 
    1962             :   /* At this point:
    1963             : 
    1964             :        op==0 (GE): pairs [i0,pair_cnt) have keys greater than or equal to query
    1965             :        op==1 (GT): pairs [i0,pair_cnt) have keys greater than             query
    1966             :        op==2 (LE): pairs [0,i0)        have keys less    than or equal to query
    1967             :        op==3 (LT): pairs [0,i0)        have keys less    than             query */
    1968             : 
    1969    14996112 :   if( op<=1 ) { /* compile time */
    1970             : 
    1971     7498056 :     if( FD_UNLIKELY( i0==pair_cnt ) ) { /* optimize for big trees */
    1972             : 
    1973             :       /* At this point:
    1974             : 
    1975             :            op==0 (GE): all pairs have keys less than             query and pairs in any next leaf have keys greater than query
    1976             :            op==1 (GT): all pairs have keys less than or equal to query and pairs in any next leaf have keys greater than query
    1977             : 
    1978             :          position iterator at first pair in next leaf (or nul if this is
    1979             :          the max leaf). */
    1980             : 
    1981     4491171 :       tree_off = leaf->next_off;
    1982     4491171 :       i0       = 0UL;
    1983     4491171 :     }
    1984             : 
    1985     7498056 :   } else {
    1986             : 
    1987     7498056 :     if( FD_UNLIKELY( i0==0UL ) ) { /* optimize for big trees */
    1988             : 
    1989             :       /* At this point:
    1990             : 
    1991             :            op==2 (LE): all pairs have keys greater than             query and pairs in any prev leaf have keys less than query
    1992             :            op==3 (LT): all pairs have keys greater than or equal to query and pairs in any prev leaf have keys less than query
    1993             : 
    1994             :          position iterator at last pair in previous leaf (or nul if this
    1995             :          is the min leaf). */
    1996             : 
    1997      742896 :       tree_off = leaf->prev_off;
    1998      742896 :       i0       = FD_UNLIKELY( !tree_off ) ? 1UL : BPLUS_(private_leaf_const)( bplus, tree_off )->pair_cnt;
    1999      742896 :     }
    2000     7498056 :     i0--;
    2001             : 
    2002     7498056 :   }
    2003             : 
    2004    14996112 :   iter.leaf_off = tree_off;
    2005    14996112 :   iter.pair_idx = i0;
    2006    14996112 :   return iter;
    2007    14996460 : }
    2008             : 
    2009             : int
    2010     1873254 : BPLUS_(verify)( BPLUS_(t) const * join ) {
    2011             : 
    2012 55111332036 : # define BPLUS_TEST(c) do {               \
    2013 52928495214 :     if( FD_UNLIKELY( !(c) ) ) {           \
    2014           0 :       FD_LOG_WARNING(( "FAIL: %s", #c )); \
    2015           0 :       return -1;                          \
    2016           0 :     }                                     \
    2017 52928495214 :   } while(0)
    2018             : 
    2019             :   /* Verify join */
    2020             : 
    2021     1873254 :   BPLUS_TEST( join );
    2022             : 
    2023     1873254 :   BPLUS_(private_t) const * bplus = BPLUS_(private_const)( join );
    2024             : 
    2025     1873254 :   BPLUS_TEST( fd_ulong_is_aligned( (ulong)bplus, BPLUS_ALIGN ) );
    2026             : 
    2027             :   /* Verify header */
    2028             : 
    2029     1873254 :   BPLUS_TEST( bplus->magic==BPLUS_MAGIC );
    2030             : 
    2031     1873254 :   ulong node_max = bplus->node_max;
    2032     1873254 :   ulong leaf_max = bplus->leaf_max;
    2033             : 
    2034     1873254 :   BPLUS_TEST( node_max<=BPLUS_(private_node_max_max)() );
    2035     1873254 :   BPLUS_TEST( leaf_max<=BPLUS_(private_leaf_max_max)() );
    2036             : 
    2037     1873254 :   ulong node_lo = bplus->node_lo;
    2038     1873254 :   ulong leaf_lo = bplus->leaf_lo;
    2039             : 
    2040     1873254 :   BPLUS_TEST( node_lo==fd_ulong_align_up(                    sizeof( BPLUS_(private_t)      ), BPLUS_NODE_ALIGN ) );
    2041     1873254 :   BPLUS_TEST( leaf_lo==fd_ulong_align_up( node_lo + node_max*sizeof( BPLUS_(private_node_t) ), BPLUS_LEAF_ALIGN ) );
    2042             : 
    2043     1873254 :   ulong node_hi = node_lo + node_max*sizeof( BPLUS_(private_node_t) );
    2044     1873254 :   ulong leaf_hi = leaf_lo + leaf_max*sizeof( BPLUS_(private_leaf_t) );
    2045             : 
    2046     1873254 :   ulong root_off     = bplus->root_off;
    2047     1873254 :   ulong leaf_min_off = bplus->leaf_min_off;
    2048     1873254 :   ulong leaf_max_off = bplus->leaf_max_off;
    2049             : 
    2050     1873254 :   if( FD_LIKELY( root_off ) ) {
    2051             : 
    2052     1873170 :     BPLUS_TEST( node_lo<=root_off ); BPLUS_TEST( root_off<leaf_hi  );
    2053     1873170 :     BPLUS_TEST( fd_ulong_is_aligned( root_off, fd_ulong_if( !BPLUS_(private_is_leaf)( root_off, leaf_lo ),
    2054     1873170 :                                                             BPLUS_NODE_ALIGN, BPLUS_LEAF_ALIGN ) ) );
    2055             : 
    2056     1873170 :     BPLUS_TEST( leaf_lo<=leaf_min_off ); BPLUS_TEST( leaf_min_off<leaf_hi  );
    2057     1873170 :     BPLUS_TEST( fd_ulong_is_aligned( leaf_min_off, BPLUS_LEAF_ALIGN ) );
    2058             : 
    2059     1873170 :     BPLUS_TEST( leaf_lo<=leaf_max_off ); BPLUS_TEST( leaf_max_off<leaf_hi  );
    2060     1873170 :     BPLUS_TEST( fd_ulong_is_aligned( leaf_max_off, BPLUS_LEAF_ALIGN ) );
    2061             : 
    2062     1873170 :   } else {
    2063             : 
    2064          84 :     BPLUS_TEST( !leaf_min_off );
    2065          84 :     BPLUS_TEST( !leaf_max_off );
    2066             : 
    2067          84 :   }
    2068             : 
    2069     1873254 :   ulong node_rem = bplus->node_max;
    2070     1873254 :   ulong leaf_rem = bplus->leaf_max;
    2071             : 
    2072             :   /* Verify node pool */
    2073             : 
    2074     1873254 :   ulong node_off = bplus->node_pool_off;
    2075  1336552995 :   while( FD_LIKELY( node_off ) ) {
    2076  1334679741 :     BPLUS_TEST( node_rem ); node_rem--;
    2077  1334679741 :     BPLUS_TEST( node_lo<=node_off ); BPLUS_TEST( node_off<node_hi  );
    2078  1334679741 :     BPLUS_TEST( fd_ulong_is_aligned( node_off, BPLUS_NODE_ALIGN ) );
    2079  1334679741 :     node_off = BPLUS_(private_node_const)( bplus, node_off )->tree_off[0];
    2080  1334679741 :   }
    2081             : 
    2082             :   /* Verify leaf pool */
    2083             : 
    2084     1873254 :   ulong leaf_off = bplus->leaf_pool_off;
    2085  2242739487 :   while( FD_LIKELY( leaf_off ) ) {
    2086  2240866233 :     BPLUS_TEST( leaf_rem ); leaf_rem--;
    2087  2240866233 :     BPLUS_TEST( leaf_lo<=leaf_off ); BPLUS_TEST( leaf_off<leaf_hi  );
    2088  2240866233 :     BPLUS_TEST( fd_ulong_is_aligned( leaf_off, BPLUS_LEAF_ALIGN ) );
    2089  2240866233 :     leaf_off = BPLUS_(private_leaf_const)( bplus, leaf_off )->next_off;
    2090  2240866233 :   }
    2091             : 
    2092             :   /* Verify the actual tree */
    2093             : 
    2094     1873254 :   ulong leaf_cnt = leaf_rem;
    2095             : 
    2096     1873254 :   if( FD_LIKELY( root_off ) ) { /* optimize for big trees */
    2097             : 
    2098             :     /* At this point, the tree is not empty */
    2099             : 
    2100     1873170 :     ulong tree_min = BPLUS_(private_tree_min)();
    2101     1873170 :     ulong tree_max = BPLUS_(private_tree_max)();
    2102             : 
    2103     1873170 :     ulong pair_min = BPLUS_(private_pair_min)();
    2104     1873170 :     ulong pair_max = BPLUS_(private_pair_max)();
    2105             : 
    2106     1873170 :     ulong               stack_tree_off   [ 128 ];
    2107     1873170 :     ulong               stack_subtree_idx[ 128 ];
    2108     1873170 :     BPLUS_KEY_T const * stack_key_lo     [ 128 ];
    2109     1873170 :     BPLUS_KEY_T const * stack_key_hi     [ 128 ];
    2110     1873170 :     ulong               stack_cnt = 0UL;
    2111     1873170 :     ulong               stack_max = 128UL;
    2112             : 
    2113     1873170 :     ulong               tree_off    = root_off;
    2114     1873170 :     ulong               subtree_idx = 0UL;
    2115     1873170 :     BPLUS_KEY_T const * key_lo      = NULL;
    2116     1873170 :     BPLUS_KEY_T const * key_hi      = NULL;
    2117             : 
    2118  3776521611 :     for(;;) {
    2119             : 
    2120             :       /* At this point, we are still validating the tree rooted at
    2121             :          tree_off and this tree should contain only keys in
    2122             :          [key_lo,key_hi).  key_{lo,hi}==NULL indicates key_{lo,hi} is
    2123             :          {-inf,+inf}.
    2124             : 
    2125             :          If tree is a node, we've validated all of tree's subtrees
    2126             :          [0,subtree_idx).  subtree_idx==0 indicates this is the first
    2127             :          time we've visited this node.
    2128             : 
    2129             :          If tree is a leaf, as we only visit each leaf exactly once,
    2130             :          subtree_idx will be zero (and otherwise ignored). */
    2131             : 
    2132  3776521611 :       if( FD_LIKELY( !BPLUS_(private_is_leaf)( tree_off, leaf_lo ) ) ) { /* tree is a node */
    2133             : 
    2134             :         /* If this is the first time visiting this node, validate it */
    2135             : 
    2136  2180963652 :         if( FD_UNLIKELY( !subtree_idx ) ) {
    2137             : 
    2138             :           /* Validate no loops */
    2139             : 
    2140   587278863 :           BPLUS_TEST( node_rem ); node_rem--;
    2141             : 
    2142             :           /* Validate the node pointer */
    2143             : 
    2144   587278863 :           BPLUS_TEST( node_lo<=tree_off ); BPLUS_TEST( tree_off<node_hi );
    2145   587278863 :           BPLUS_TEST( fd_ulong_is_aligned( tree_off, BPLUS_NODE_ALIGN ) );
    2146             : 
    2147   587278863 :           BPLUS_(private_node_t) const * node = BPLUS_(private_node_const)( bplus, tree_off );
    2148             : 
    2149   587278863 :           BPLUS_KEY_T const * subtree_pivot = node->pivot;
    2150   587278863 :           ulong       const * subtree_off   = node->tree_off;
    2151   587278863 :           ulong               subtree_cnt   = node->tree_cnt;
    2152             : 
    2153             :           /* Validate the node tree count */
    2154             : 
    2155   587278863 :           BPLUS_TEST( fd_ulong_if( tree_off!=root_off, tree_min, 2UL )<=subtree_cnt );
    2156   587278863 :           BPLUS_TEST( subtree_cnt<=tree_max );
    2157             : 
    2158             :           /* Validate the node tree offsets */
    2159             : 
    2160   587278863 :           int is_leaf = BPLUS_(private_is_leaf)( subtree_off[0], leaf_lo );
    2161             : 
    2162   587278863 :           ulong lo    = fd_ulong_if( is_leaf, leaf_lo,          node_lo          );
    2163   587278863 :           ulong hi    = fd_ulong_if( is_leaf, leaf_hi,          node_hi          );
    2164   587278863 :           ulong align = fd_ulong_if( is_leaf, BPLUS_LEAF_ALIGN, BPLUS_NODE_ALIGN );
    2165             : 
    2166  2768242515 :           for( ulong idx=0UL; idx<subtree_cnt; idx++ ) {
    2167  2180963652 :             ulong off = subtree_off[ idx ];
    2168  2180963652 :             BPLUS_TEST( lo<=off ); BPLUS_TEST( off<hi );
    2169  2180963652 :             BPLUS_TEST( fd_ulong_is_aligned( off, align ) );
    2170  2180963652 :           }
    2171             : 
    2172             :           /* Validate the node pivots */
    2173             : 
    2174   587278863 :           if( FD_LIKELY( key_lo ) ) BPLUS_TEST( BPLUS_(private_key_cmp)( key_lo, &subtree_pivot[0] )<0 );
    2175             : 
    2176  1593684789 :           for( ulong idx=1UL; idx<subtree_cnt-1UL; idx++ )
    2177  1006405926 :             BPLUS_TEST( BPLUS_(private_key_cmp)( &subtree_pivot[idx-1UL], &subtree_pivot[idx] )<0 );
    2178             : 
    2179   587278863 :           if( FD_LIKELY( key_hi ) ) BPLUS_TEST( BPLUS_(private_key_cmp)( &subtree_pivot[subtree_cnt-2UL], key_hi )<0 );
    2180   587278863 :         }
    2181             : 
    2182             :         /* At this point, tree_off is a bplus global offset of a
    2183             :            verified node (verified either just now or on a previous
    2184             :            iteration).  If subtree_idx isn't the last subtree, push
    2185             :            subtree_idx+1 onto the stack for a later iteration. */
    2186             : 
    2187  2180963652 :         BPLUS_(private_node_t) const * node = BPLUS_(private_node_const)( bplus, tree_off );
    2188             : 
    2189  2180963652 :         BPLUS_KEY_T const * subtree_pivot = node->pivot;
    2190  2180963652 :         ulong       const * subtree_off   = node->tree_off;
    2191  2180963652 :         ulong               subtree_cnt   = node->tree_cnt;
    2192             : 
    2193  2180963652 :         if( FD_LIKELY( (subtree_idx+1UL)<subtree_cnt ) ) {
    2194  1593684789 :           BPLUS_TEST( stack_cnt<stack_max );
    2195  1593684789 :           stack_tree_off   [ stack_cnt ] = tree_off;
    2196  1593684789 :           stack_subtree_idx[ stack_cnt ] = subtree_idx+1UL;
    2197  1593684789 :           stack_key_lo     [ stack_cnt ] = key_lo;
    2198  1593684789 :           stack_key_hi     [ stack_cnt ] = key_hi;
    2199  1593684789 :           stack_cnt++;
    2200  1593684789 :         }
    2201             : 
    2202             :         /* And recurse into subtree_idx for the next iteration.  Note
    2203             :            this node's key_lo is subtree_idx 0's key_lo and this node's
    2204             :            key_hi is subtree_idx tree_cnt-1's key_hi. */
    2205             : 
    2206  2180963652 :         /**/                                           tree_off =  subtree_off  [ subtree_idx     ];
    2207  2180963652 :         if( FD_LIKELY( subtree_idx>0UL             ) ) key_lo   = &subtree_pivot[ subtree_idx-1UL ];
    2208  2180963652 :         if( FD_LIKELY( subtree_idx<subtree_cnt-1UL ) ) key_hi   = &subtree_pivot[ subtree_idx     ];
    2209  2180963652 :         subtree_idx = 0UL;
    2210  2180963652 :         continue;
    2211  2180963652 :       }
    2212             : 
    2213             :       /* At this point, tree is a leaf.  Validate no loops. */
    2214             : 
    2215  1595557959 :       BPLUS_TEST( leaf_rem ); leaf_rem--;
    2216             : 
    2217             :       /* Validate the leaf pointer */
    2218             : 
    2219  1595557959 :       BPLUS_TEST( leaf_lo<=tree_off ); BPLUS_TEST( tree_off<leaf_hi );
    2220  1595557959 :       BPLUS_TEST( fd_ulong_is_aligned( tree_off, BPLUS_LEAF_ALIGN ) );
    2221             : 
    2222  1595557959 :       BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, tree_off );
    2223             : 
    2224  1595557959 :       BPLUS_PAIR_T const * pair     = leaf->pair;
    2225  1595557959 :       ulong                pair_cnt = leaf->pair_cnt;
    2226             : 
    2227             :       /* Validate the leaf pair count */
    2228             : 
    2229  1595557959 :       BPLUS_TEST( fd_ulong_if( tree_off!=root_off, pair_min, 1UL )<=pair_cnt );
    2230  1595557959 :       BPLUS_TEST( pair_cnt<=pair_max );
    2231             : 
    2232             :       /* Validate the leaf pairs */
    2233             : 
    2234  1595557959 :       if( FD_LIKELY( key_lo ) ) BPLUS_TEST( BPLUS_(private_key_cmp)( key_lo, &pair[0].BPLUS_PAIR_KEY )<=0 );
    2235             : 
    2236  4031130432 :       for( ulong idx=1UL; idx<pair_cnt; idx++ )
    2237  2435572473 :         BPLUS_TEST( BPLUS_(private_key_cmp)( &pair[idx-1UL].BPLUS_PAIR_KEY, &pair[idx].BPLUS_PAIR_KEY )<0 );
    2238             : 
    2239  1595557959 :       if( FD_LIKELY( key_hi ) ) BPLUS_TEST( BPLUS_(private_key_cmp)( &pair[ pair_cnt-1UL ].BPLUS_PAIR_KEY, key_hi )<0 );
    2240             : 
    2241             :       /* (Note that we validate the leaf ordered iterator below.) */
    2242             : 
    2243             :       /* If no more work to do, abort.  Otherwise, get the next node to
    2244             :          process. */
    2245             : 
    2246  1595557959 :       if( FD_UNLIKELY( !stack_cnt ) ) break;
    2247  1593684789 :       stack_cnt--;
    2248  1593684789 :       tree_off    = stack_tree_off   [ stack_cnt ];
    2249  1593684789 :       subtree_idx = stack_subtree_idx[ stack_cnt ];
    2250  1593684789 :       key_lo      = stack_key_lo     [ stack_cnt ];
    2251  1593684789 :       key_hi      = stack_key_hi     [ stack_cnt ];
    2252  1593684789 :     }
    2253     1873170 :   }
    2254             : 
    2255             :   /* Validate all nodes and leaves touched */
    2256             : 
    2257     1873254 :   BPLUS_TEST( !node_rem );
    2258     1873254 :   BPLUS_TEST( !leaf_rem );
    2259             : 
    2260             :   /* Validate leaf iteration */
    2261             : 
    2262     1873254 :   leaf_rem = leaf_cnt;
    2263             : 
    2264     1873254 :   ulong leaf_prev_off = 0UL;
    2265     1873254 :   /**/  leaf_off      = bplus->leaf_min_off;
    2266  1597431213 :   while( leaf_off ) { /* Validates leaf->next_off for last iteration */
    2267             : 
    2268             :     /* Validate no loops */
    2269             : 
    2270  1595557959 :     BPLUS_TEST( leaf_rem ); leaf_rem--;
    2271             : 
    2272             :     /* Validate forward iteration (validates bplus->leaf_min_off first
    2273             :        iteration, validates leaf->next_off interior iterations) */
    2274             : 
    2275  1595557959 :     BPLUS_TEST( leaf_lo<=leaf_off ); BPLUS_TEST( leaf_off<leaf_hi );
    2276  1595557959 :     BPLUS_TEST( fd_ulong_is_aligned( leaf_off, BPLUS_LEAF_ALIGN ) );
    2277  1595557959 :     BPLUS_(private_leaf_t) const * leaf = BPLUS_(private_leaf_const)( bplus, leaf_off );
    2278             : 
    2279             :     /* Validate reverse iteration (validates leaf->prev_off,
    2280             :        bplus->leaf_max_off validated below) */
    2281             : 
    2282  1595557959 :     BPLUS_TEST( leaf->prev_off==leaf_prev_off );
    2283             : 
    2284             :     /* Validate ordered leaves */
    2285             : 
    2286  1595557959 :     if( FD_LIKELY( leaf_prev_off ) ) {
    2287  1593684789 :       BPLUS_(private_leaf_t) const * prev = BPLUS_(private_leaf_const)( bplus, leaf_prev_off );
    2288  1593684789 :       BPLUS_TEST( BPLUS_(private_key_cmp)( &prev->pair[ prev->pair_cnt-1UL ].BPLUS_PAIR_KEY, &leaf->pair[ 0 ].BPLUS_PAIR_KEY )<0 );
    2289  1593684789 :     }
    2290             : 
    2291  1595557959 :     leaf_prev_off = leaf_off;
    2292  1595557959 :     leaf_off      = leaf->next_off;
    2293  1595557959 :   }
    2294             : 
    2295     1873254 :   BPLUS_TEST( bplus->leaf_max_off==leaf_prev_off ); /* Validates bplus->leaf_max_off */
    2296     1873254 :   BPLUS_TEST( !leaf_rem );                          /* All leaves in tree covered */
    2297             : 
    2298     1873254 : # undef BPLUS_TEST
    2299             : 
    2300     1873254 :   return 0;
    2301     1873254 : }
    2302             : 
    2303             : #endif
    2304             : 
    2305             : #undef BPLUS_STATIC
    2306             : #undef BPLUS_
    2307             : 
    2308             : #undef BPLUS_IMPL_STYLE
    2309             : #undef BPLUS_MAGIC
    2310             : #undef BPLUS_LEAF_ALIGN
    2311             : #undef BPLUS_NODE_ALIGN
    2312             : #undef BPLUS_ALIGN
    2313             : #undef BPLUS_TREE_MAX
    2314             : #undef BPLUS_NODE_MAX
    2315             : #undef BPLUS_PAIR_T
    2316             : #undef BPLUS_KEY_T
    2317             : #undef BPLUS_NAME

Generated by: LCOV version 1.14