LCOV - code coverage report
Current view: top level - util/wksp - fd_wksp_private.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 67 76 88.2 %
Date: 2025-03-20 12:08:36 Functions: 58 2556 2.3 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_util_wksp_fd_wksp_private_h
       2             : #define HEADER_fd_src_util_wksp_fd_wksp_private_h
       3             : 
       4             : #include "fd_wksp.h"
       5             : 
       6             : /* If FD_WKSP_LOCK_RECLAIM==0, do not try to recover the lock from
       7             :    dead processes.  This is useful, for example, if we know that the
       8             :    lock will not get acquired by another process, or that if another
       9             :    acquiring process dies that all potential users will get exited.  It
      10             :    prevents a syscall on various common workspace paths (eg, alloc). */
      11             : 
      12             : #ifndef FD_WKSP_LOCK_RECLAIM
      13             : #define FD_WKSP_LOCK_RECLAIM 0
      14             : #endif
      15             : 
      16             : /* FD_WKSP_PRIVATE_PINFO_IDX_NULL is the pinfo index value used to
      17             :    indicate NULL */
      18             : 
      19 25892195931 : #define FD_WKSP_PRIVATE_PINFO_IDX_NULL ((ulong)UINT_MAX)
      20             : 
      21             : /* A fd_wksp_private_pinfo_t specifies details about a partition in a
      22             :    workspace and its relationship to other partitions in that workspace.
      23             : 
      24             :    FD_WKSP_PRIVATE_PINFO_ALIGN is an integer power of 2 and
      25             :    FD_WKSP_PRIVATE_PINFO_FOOTPRINT will be a multiple of align.
      26             : 
      27             :    If a partition is not idle:
      28             : 
      29             :    - [gaddr_lo,gaddr_hi) specify the range of offsets covered by this
      30             :      partition.
      31             : 
      32             :        wksp_gaddr_lo <= gaddr_lo < gaddr_hi <= wksp_gaddr_hi
      33             : 
      34             :      such that partitions always have at least 1 byte and are contained
      35             :      within the workspace's data region.
      36             : 
      37             :    - tag==0 indicates this partition is space in the workspace free
      38             :      for use and the partition is in the free treap, fast findable by
      39             :      its size.  Otherwise, this partition is allocated and the partition
      40             :      is in the used treap, fast O(1) findable by any in range gaddr.
      41             : 
      42             :    - prev_cidx, next_cidx give the index (in compressed form) of the
      43             :      previous / next partition if present or IDX_NULL if this is the
      44             :      workspace partition head / tail partition (in which case gaddr_lo /
      45             :      gaddr_hi will be wksp->gaddr_lo / wksp->gaddr_hi).  That is, for
      46             :      partition idx where idx is in [0,wksp->part_max):
      47             : 
      48             :        ulong prev_idx = fd_wksp_private_pinfo_idx( pinfo[ idx ].prev_cidx );
      49             :        if( fd_wksp_private_pinfo_idx_is_null( prev_idx ) ) {
      50             :          ... at this point:
      51             :          ...   idx == fd_wksp_private_pinfo_idx( wksp->part_head_cidx ) );
      52             :          ...   pinfo[ idx ].gaddr_lo == wksp->gaddr_lo );
      53             :        } else {
      54             :          ... at this point:
      55             :          ...   idx ==_fd_wksp_private_pinfo_idx( pinfo[ prev_idx ].next_cidx );
      56             :          ...   pinfo[ idx ].gaddr_lo == pinfo[ prev_idx ].gaddr_hi;
      57             :        }
      58             : 
      59             :      and:
      60             : 
      61             :        ulong next_idx = fd_wksp_private_pinfo_idx( pinfo[ idx ].next_cidx );
      62             :        if( fd_wksp_private_pinfo_idx_is_null( next_idx ) ) {
      63             :          ... at this point:
      64             :          ...   idx == fd_wksp_private_pinfo_idx( wksp->part_tail_cidx ) );
      65             :          ...   pinfo[ idx ].gaddr_hi == wksp->gaddr_hi );
      66             :        } else {
      67             :          ... at this point:
      68             :          ...   idx ==_fd_wksp_private_pinfo_idx( pinfo[ next_idx ].prev_cidx );
      69             :          ...   pinfo[ idx ].gaddr_hi == pinfo[ next_idx ].gaddr_lo;
      70             :        }
      71             : 
      72             :    - If partition idx is in the used treap, {left,right}_cidx specify
      73             :      the partition indices of the root of the left and right subtrees.
      74             : 
      75             :      The used treap obeys the binary search tree property that all
      76             :      partitions in the left/right subtree (if any) cover a range of
      77             :      offsets strictly lower/higher than range covered by idx.
      78             : 
      79             :      parent_cidx specifies idx's parent tree (if any).  If idx is the
      80             :      wksp used tree root, parent_cidx will specify IDX_NULL and
      81             :      wksp->part_used_cidx will specify idx.
      82             : 
      83             :      The used treap also obeys the heap property to make it well
      84             :      balanced on average.  Specifically, the idx's parent's heap
      85             :      priority will be at least idx's heap priority.
      86             : 
      87             :      in_same will be 0 and same_cidx will specify IDX_NULL as no used
      88             :      partitions can overlap.
      89             : 
      90             :    - If partition idx is in the free treap, if partition idx is not
      91             :      in a list of same sized partitions, in_same will be 0 and
      92             :      {left,right}_cidx specify the partition indices of the root of the
      93             :      left and right subtrees.
      94             : 
      95             :      The free treap obeys the binary search tree property that all
      96             :      partitions in the left/right subtree (if any) have partition sizes
      97             :      strictly lower/higher than partition idx's size.
      98             : 
      99             :      parent_cidx specifies idx's parent tree (if any).  If idx is the
     100             :      wksp free tree root, parent_cidx will specify IDX_NULL and
     101             :      wksp->part_free_cidx will specify idx.
     102             : 
     103             :      The free treap also obeys the heap property to make it well
     104             :      balanced on average.  Specifically, the idx's parent's heap
     105             :      priority will be at least idx's heap priority.
     106             : 
     107             :      If there are additional partitions of the same size to partition
     108             :      idx, same_cidx will refer to the next partition of the same size.
     109             : 
     110             :      If partition idx is in a list of same sized partitions, in_same
     111             :      will be 1 and parent_cidx / same_cidx will specify the prev / next
     112             :      index of additional partitions of the same size.  same_cidx will
     113             :      specify IDX_NULL if no more.
     114             : 
     115             :    - heap_prio is a random value used as described above.
     116             : 
     117             :    - stack_cidx and cycle_tag are for internal use */
     118             : 
     119             : /* TODO: Consider align 32/ footprint 96 without compressed indices if
     120             :    ever needing more than ~4B partitions. */
     121             : 
     122             : #define FD_WKSP_PRIVATE_PINFO_ALIGN     (64UL) /* At most FD_WKSP_ALIGN */
     123             : #define FD_WKSP_PRIVATE_PINFO_FOOTPRINT (64UL)
     124             : 
     125             : struct __attribute__((aligned(FD_WKSP_PRIVATE_PINFO_ALIGN))) fd_wksp_private_pinfo {
     126             :   ulong gaddr_lo;       /* If in idle stack, 0 */
     127             :   ulong gaddr_hi;       /* ",                0 */
     128             :   ulong tag;            /* ",                0 */
     129             :   uint  heap_prio : 31; /* 30 bit priority and 1 bit free to use for infinite priority bulk tree ops */
     130             :   uint  in_same   :  1; /* 1 if in a same list and 0 otherwise */
     131             :   uint  prev_cidx;      /* ",                fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
     132             :   uint  next_cidx;      /* ",                fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
     133             :   uint  left_cidx;      /* ",                fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
     134             :   uint  right_cidx;     /* ",                fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
     135             :   uint  parent_cidx;    /* ",                cidx of next idle or fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) if no more */
     136             :   uint  same_cidx;      /* ",                fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
     137             :   uint  stack_cidx;     /* internal use */
     138             :   ulong cycle_tag;      /* internal use */
     139             : };
     140             : 
     141             : typedef struct fd_wksp_private_pinfo fd_wksp_private_pinfo_t;
     142             : 
     143             : /* FD_WKSP_MAGIC is an ideally unique number that specifies the precise
     144             :    memory layout of a fd_wksp. */
     145             : 
     146         108 : #define FD_WKSP_MAGIC (0xF17EDA2C3731C591UL) /* F17E=FIRE,DA2C/3R<>DANCER,31/C59<>WKSP,0<>0 --> FIRE DANCER WKSP VERSION 1 */
     147             : 
     148             : /* fd_wksp_private specifies the detailed layout of the internals of a
     149             :    fd_wksp_t */
     150             : 
     151             : struct fd_wksp_private {
     152             : 
     153             :   /* This point is FD_WKSP_ALIGN aligned */
     154             : 
     155             :   /* This fields are static and mostly in the first cache line */
     156             : 
     157             :   ulong magic;                     /* ==FD_WKSP_MAGIC */
     158             :   ulong part_max;                  /* Max wksp partitions */
     159             :   ulong data_max;                  /* Data region */
     160             :   ulong gaddr_lo;                  /* ==fd_wksp_private_data_off( part_max ), data region covers offsets [gaddr_lo,gaddr_hi) */
     161             :   ulong gaddr_hi;                  /* ==gaddr_lo + data_max,                  offset gaddr_hi is to 1 byte footer */
     162             :   char  name[ FD_SHMEM_NAME_MAX ]; /* (Convenience) backing fd_shmem region cstr name */
     163             :   uint  seed;                      /* Heap priority random number seed, arbitrary */
     164             : 
     165             :   /* These fields are dynamic and in the adjacent cache line */
     166             : 
     167             :   uint  idle_top_cidx;             /* Stack of partition infos not in use, parent_idx is next pointer */
     168             :   uint  part_head_cidx;            /* Index for info about the leftmost partition */
     169             :   uint  part_tail_cidx;            /* Index for info about the rightmost partition */
     170             :   uint  part_used_cidx;            /* Treap of partitions that are currently used (tag!=0), searchable by gaddr */
     171             :   uint  part_free_cidx;            /* Treap of partitions that are currently free (tag==0), searchable by size */
     172             :   ulong cycle_tag;                 /* Used for cycle detection */
     173             :   ulong owner;                     /* thread group id of the owner or NULL otherwise */
     174             : 
     175             :   /* IMPORTANT!  The "single-source-of-truth" for what is currently
     176             :      used (and its tags) is the set of non-zero tagged partitions in the
     177             :      partition info array.  The idle stack, partition list, used treap
     178             :      and free treap are auxiliary data structuring that can be
     179             :      reconstructed at any time from this single source of truth.
     180             : 
     181             :      Conversely, if there accidental or deliberate data corruption of
     182             :      the wksp metadata resulting in a conflict between what is stored
     183             :      in the partition info array and the auxiliary data structures,
     184             :      the partition info array governs. */
     185             : 
     186             :   /* Padding to FD_WKSP_PRIVATE_PINFO_ALIGN here */
     187             : 
     188             :   /* part_max pinfo here */
     189             :   /* data_max byte data region here */
     190             :   /* 1 footer byte here */
     191             :   /* Padding to FD_WKSP_ALIGN here */
     192             : };
     193             : 
     194             : FD_PROTOTYPES_BEGIN
     195             : 
     196             : /* fd_wksp_private_pinfo_sz returns the size of a partition in bytes.
     197             :    Assumes pinfo points to the pinfo of a partition in a current local
     198             :    join.  Will be positive. */
     199             : 
     200             : FD_FN_PURE static inline ulong
     201  7126867671 : fd_wksp_private_pinfo_sz( fd_wksp_private_pinfo_t const * pinfo ) {
     202  7126867671 :   return pinfo->gaddr_hi - pinfo->gaddr_lo;
     203  7126867671 : }
     204             : 
     205             : /* fd_wksp_private_{part,data}_off return the wksp offset of the
     206             :    pinfo array and the data region.  data_off assumes part_max is a
     207             :    value that will not overflow. */
     208             : 
     209             : FD_FN_CONST static inline ulong
     210           0 : fd_wksp_private_pinfo_off( void ) {
     211           0 :   return 128UL; /* fd_ulong_align_up( sizeof(fd_wksp_t), FD_WKSP_PRIVATE_PINFO_ALIGN ); */
     212           0 : }
     213             : 
     214             : FD_FN_CONST static inline ulong
     215        2856 : fd_wksp_private_data_off( ulong part_max ) {
     216        2856 :   return fd_wksp_private_pinfo_off() + part_max*sizeof(fd_wksp_private_pinfo_t);
     217        2856 : }
     218             : 
     219             : /* fd_wksp_private_pinfo returns the location of wksp pinfo array in the
     220             :    caller's address space.  Assumes wksp is a current local join.
     221             :    fd_wksp_private_pinfo_const is a const-correct version. */
     222             : 
     223             : FD_FN_CONST static inline fd_wksp_private_pinfo_t *
     224   545653362 : fd_wksp_private_pinfo( fd_wksp_t * wksp ) {
     225   545653362 :   return (fd_wksp_private_pinfo_t *)(((ulong)wksp) + fd_wksp_private_pinfo_off());
     226   545653362 : }
     227             : 
     228             : FD_FN_CONST static inline fd_wksp_private_pinfo_t const *
     229           0 : fd_wksp_private_pinfo_const( fd_wksp_t const * wksp ) {
     230           0 :   return (fd_wksp_private_pinfo_t const *)(((ulong)wksp) + fd_wksp_private_pinfo_off());
     231           0 : }
     232             : 
     233             : /* fd_wksp_private_pinfo_{cidx,idx} compresses / uncompresses a pinfo index */
     234             : 
     235  8340952356 : static inline uint  fd_wksp_private_pinfo_cidx( ulong idx  ) { return (uint) idx;  }
     236 17130567966 : static inline ulong fd_wksp_private_pinfo_idx ( uint  cidx ) { return (ulong)cidx; }
     237             : 
     238             : /* fd_wksp_private_pinfo_idx_is_null returns 1 if idx is
     239             :    FD_WKSP_PRIVATE_PINFO_IDX_NULL and 0 otherwise */
     240             : 
     241 22059570471 : static inline int fd_wksp_private_pinfo_idx_is_null( ulong idx ) { return idx==FD_WKSP_PRIVATE_PINFO_IDX_NULL; }
     242             : 
     243             : /* pinfo idle stack APIs **********************************************/
     244             : 
     245             : /* fd_wksp_private_idle_stack_is_empty returns 1 if there are no idle
     246             :    partitions and 0 otherwise.  Also returns 1 if corruption is
     247             :    detected.  Assumes wksp is a current local join. */
     248             : 
     249             : static inline int
     250        7023 : fd_wksp_private_idle_stack_is_empty( fd_wksp_t * wksp ) {
     251        7023 :   return fd_wksp_private_pinfo_idx( wksp->idle_top_cidx ) >= wksp->part_max;
     252        7023 : }
     253             : 
     254             : /* fd_wksp_private_idle_stack_pop pops an idle partition off wksp's idle
     255             :    stack.  Assumes the caller knows idle stack is not empty.  The caller
     256             :    is promised that the popped partition has [gaddr_lo,gaddr_hi) = [0,0)
     257             :    tag 0, {prev, next, left, right, same, parent}_cidx specify IDX_NULL.
     258             :    Further, heap_prio should have been assigned a random value.
     259             :    stack_idx and cycle_tag are for internal use. */
     260             : 
     261             : static inline ulong                                                 /* Assumes in [0,part_max) */
     262             : fd_wksp_private_idle_stack_pop( fd_wksp_t *               wksp,     /* Assumes current local join */
     263   113798706 :                                 fd_wksp_private_pinfo_t * pinfo ) { /* == fd_wksp_private_pinfo( wksp ) */
     264   113798706 :   ulong i = fd_wksp_private_pinfo_idx( wksp->idle_top_cidx );
     265   113798706 :   wksp->idle_top_cidx = pinfo[ i ].parent_cidx;
     266   113798706 :   pinfo[ i ].parent_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     267   113798706 :   return i;
     268   113798706 : }
     269             : 
     270             : /* fd_wksp_private_idle_stack_push pushes partition i onto the idle
     271             :    stack.  Assumes the caller knows i is not currently in the idle
     272             :    stack, partitioning, used treap or free treap. */
     273             : 
     274             : static inline void
     275             : fd_wksp_private_idle_stack_push( ulong                     i,        /* Assumes in [0,part_max) */
     276             :                                  fd_wksp_t *               wksp,     /* Assumes current local join */
     277   118694886 :                                  fd_wksp_private_pinfo_t * pinfo ) { /* == fd_wksp_private_pinfo( wksp ) */
     278   118694886 :   pinfo[ i ].gaddr_lo    = 0UL;
     279   118694886 :   pinfo[ i ].gaddr_hi    = 0UL;
     280   118694886 :   pinfo[ i ].tag         = 0U;
     281   118694886 :   pinfo[ i ].in_same     = 0U;
     282   118694886 :   pinfo[ i ].prev_cidx   = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     283   118694886 :   pinfo[ i ].next_cidx   = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     284   118694886 :   pinfo[ i ].left_cidx   = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     285   118694886 :   pinfo[ i ].right_cidx  = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     286   118694886 :   pinfo[ i ].same_cidx   = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     287   118694886 :   pinfo[ i ].parent_cidx = wksp->idle_top_cidx;
     288   118694886 :   wksp->idle_top_cidx = fd_wksp_private_pinfo_cidx( i );
     289   118694886 : }
     290             : 
     291             : /* pinfo used treap APIs **********************************************/
     292             : 
     293             : /* fd_wksp_private_used_treap_query queries wksp's used treap for the
     294             :    used partition that holds gaddr.  On success, returns the requested
     295             :    partition idx, in [0,part_max), and, on failure, returns IDX_NULL.
     296             :    Reasons for failure include gaddr is not in a used partition and
     297             :    internal treap corruption detected.  Might consume a wksp cycle tag
     298             :    and clobber partition cycle tags.  Reasonably fast O(lg N) where N is
     299             :    the number of used partitions. */
     300             : 
     301             : ulong
     302             : fd_wksp_private_used_treap_query( ulong                     gaddr,
     303             :                                   fd_wksp_t *               wksp,
     304             :                                   fd_wksp_private_pinfo_t * pinfo );
     305             : 
     306             : /* fd_wksp_private_used_treap_insert inserts partition n into wksp's
     307             :    used treap.  Assumes n is not in the idle stack, used treap or free
     308             :    treap.  Does not care if n is in the partitioning or not.  Reasonably
     309             :    fast O(lg N) where N is the number of used partitions.
     310             : 
     311             :    Partition n should have [gaddr_lo,gaddr_hi) and heap_prio initialized
     312             :    on entry (heap_prio should be a random value).  tag need not be
     313             :    initialized but it is assumed that the caller will set the tag to its
     314             :    final value on success to make the partition officially used.  This
     315             :    will initialize {in_same, left, right, same, parent}_cidx.  This will
     316             :    ignore {prev,next}_cidx.  This might consume a wksp cycle tag and
     317             :    clobber partition stack_cidx and cycle_tag fields.
     318             : 
     319             :    Returns FD_WKSP_SUCCESS (zero) on success and a FD_WKSP_ERR_*
     320             :    (negative) on failure (logs details for failure).  Reasons for
     321             :    failure include n is not in [0,part_max), n's range is not in wksp
     322             :    data region, n was detected as already inserted (this detection is
     323             :    not guaranteed), treap internal connectivity issues were detected
     324             :    (complete detection not guaranteed), and n overlaps with at least one
     325             :    element already inserted into the treap.
     326             : 
     327             :    On failure n and the treap itself were not modified (except possibly
     328             :    clobbering of stack_cidx and cycle_tag).  Note that failure reasons
     329             :    are either user error or memory corruption.  This cannot fail in
     330             :    normal operating circumstances. */
     331             : 
     332             : int
     333             : fd_wksp_private_used_treap_insert( ulong                     n,
     334             :                                    fd_wksp_t *               wksp,    /* Assumes current local join */
     335             :                                    fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
     336             : 
     337             : /* fd_wksp_private_used_treap_remove removes partition d from wksp's
     338             :    used treap.  Assumes d in the used treap, not in the free treap, not
     339             :    in the idle stack.  Does not care if d is in the partitioning.
     340             :    Reasonably fast O(lg N) where N is the number of used partitions.
     341             :    This might consume a wksp cycle tag and clobber partition stack_cidx
     342             :    and cycle_tag fields.
     343             : 
     344             :    Returns FD_WKSP_SUCCESS (zero) on success and a FD_WKSP_ERR_*
     345             :    (negative) on failure (logs details for failure).  Reasons for
     346             :    failure include d is not in [0,part_max) and treap internal
     347             :    connectivity issues were detected (complete detection not
     348             :    guaranteed).
     349             : 
     350             :    Note that failure reasons are either user error or memory corruption.
     351             :    This cannot fail in normal operating circumstances. */
     352             : 
     353             : int
     354             : fd_wksp_private_used_treap_remove( ulong                     d,
     355             :                                    fd_wksp_t *               wksp,    /* Assumes current local join */
     356             :                                    fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
     357             : 
     358             : /* pinfo free treap APIs **********************************************/
     359             : 
     360             : /* fd_wksp_private_free_treap_query queries wksp's free treap for the
     361             :    smallest partition of at least sz.  On success, returns the index of
     362             :    a partition in the free treap suitable for sz, in [0,part_max), and,
     363             :    on failure, returns IDX_NULL.  Reasons for failure include sz zero,
     364             :    sz is larger than any free partition, and internal treap corruption
     365             :    was detected.  Might consume a wksp cycle tag and clobber partition
     366             :    cycle tags.  Reasonably fast O(lg N) where N is the number of used
     367             :    partitions. */
     368             : 
     369             : ulong
     370             : fd_wksp_private_free_treap_query( ulong                     sz,
     371             :                                   fd_wksp_t *               wksp,    /* Assumes current local join */
     372             :                                   fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
     373             : 
     374             : /* fd_wksp_private_free_treap_insert inserts partition n into wksp's
     375             :    free treap.  Assumes n is not in the idle stack, used treap or free
     376             :    treap.  Does not care if n is in the partitioning or not.  Reasonably
     377             :    fast O(lg N) where N is the number of partitions in the free treap.
     378             : 
     379             :    Partition n should have [gaddr_lo,gaddr_hi) and heap_prio initialized
     380             :    on entry (heap_prio should be a random value).  tag need not be
     381             :    initialized but it is assumed that the caller will zero the tag
     382             :    beforehand to make the partition officially free.  This will
     383             :    initialize {in_same, left, right, same, parent}_cidx.  This will
     384             :    ignore {prev,next}_cidx.  This might consume a wksp cycle tag and
     385             :    clobber the partition stack_cidx and cycle_tag fields.
     386             : 
     387             :    Returns FD_WKSP_SUCCESS (zero) on success and a FD_WKSP_ERR_*
     388             :    (negative) on failure (logs details for failure).  Reasons for
     389             :    failure include n is not in [0,part_max), n's range is not in wksp
     390             :    data region, n's tag is not zero, n was detected as already inserted
     391             :    (this detection is not guaranteed), treap internal connectivity
     392             :    issues were detected (complete detection not guaranteed).
     393             : 
     394             :    If n's size exactly matches the size of partition already in the
     395             :    treap, n will be pushed onto that partition's same stack rather than
     396             :    inserted into the treap.
     397             : 
     398             :    On failure n and the treap itself were not modified (except possibly
     399             :    clobbering of stack_cidx and cycle_tag).  Note that failures reasons
     400             :    are either user error or memory corruption.  This has no failures in
     401             :    normal operating circumstances. */
     402             : 
     403             : int
     404             : fd_wksp_private_free_treap_insert( ulong                     n,
     405             :                                    fd_wksp_t *               wksp,    /* Assumes current local join */
     406             :                                    fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
     407             : 
     408             : /* fd_wksp_private_free_treap_same_is_empty returns 1 if the same list
     409             :    for d is empty and 0 if not.  Returns 1 if corruption in detected.
     410             :    Assumes d is in the free treap. */
     411             : 
     412             : static inline int
     413             : fd_wksp_private_free_treap_same_is_empty( ulong                     d,
     414             :                                           fd_wksp_t *               wksp,     /* Assumes current local join */
     415    78520242 :                                           fd_wksp_private_pinfo_t * pinfo ) { /* == fd_wksp_private_pinfo( wksp ) */
     416    78520242 :   ulong part_max = wksp->part_max;
     417    78520242 :   return fd_wksp_private_pinfo_idx( pinfo[ d ].same_cidx )>=part_max;
     418    78520242 : }
     419             : 
     420             : /* fd_wksp_private_free_treap_same_remove removes the first partition
     421             :    from d's same list.  Assumes the caller knows d's same list is not
     422             :    empty.  The caller is promised that returned partition has the same
     423             :    size as d. */
     424             : 
     425             : static inline ulong
     426             : fd_wksp_private_free_treap_same_remove( ulong                     d,
     427             :                                         fd_wksp_t *               wksp,     /* Assumes current local join */
     428     3604728 :                                         fd_wksp_private_pinfo_t * pinfo ) { /* == fd_wksp_private_pinfo( wksp ) */
     429     3604728 :   ulong part_max = wksp->part_max;
     430     3604728 :   ulong i = fd_wksp_private_pinfo_idx( pinfo[ d ].same_cidx );
     431     3604728 :   ulong j = fd_wksp_private_pinfo_idx( pinfo[ i ].same_cidx );
     432     3604728 :   /**/             pinfo[ d ].same_cidx = fd_wksp_private_pinfo_cidx( j );
     433     3604728 :   if( j<part_max ) pinfo[ j ].parent_cidx = fd_wksp_private_pinfo_cidx( d );
     434     3604728 :   pinfo[ i ].in_same     = 0U;
     435     3604728 :   pinfo[ i ].same_cidx   = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     436     3604728 :   pinfo[ i ].parent_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     437     3604728 :   return i;
     438     3604728 : }
     439             : 
     440             : /* fd_wksp_private_free_treap_remove removes partition d from wksp's
     441             :    free treap.  Assumes d in the free treap, not in the used treap, not
     442             :    in the idle stack.  Does not care if d is in the partitioning.
     443             :    Reasonably fast O(lg N) where N is the number of free partitions.
     444             :    This might consume a wksp cycle tag and clobber partition stack_cidx
     445             :    and cycle_tag fields.  There is an edge case where d's can be swapped
     446             :    with another same sized partition.
     447             : 
     448             :    Returns FD_WKSP_SUCCESS (zero) on success and a FD_WKSP_ERR_*
     449             :    (negative) on failure (logs details for failure).  Reasons for
     450             :    failure include d is not in [0,part_max) and treap internal
     451             :    connectivity issues were detected (complete detection not
     452             :    guaranteed).
     453             : 
     454             :    Note that failure reasons are either user error or memory corruption.
     455             :    This cannot fail in normal operating circumstances. */
     456             : 
     457             : int
     458             : fd_wksp_private_free_treap_remove( ulong                     d,
     459             :                                    fd_wksp_t *               wksp,    /* Assumes current local join */
     460             :                                    fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
     461             : 
     462             : /* private admin APIs *************************************************/
     463             : 
     464             : /* fd_wksp_private_lock locks wksp.  Assumes wksp is a current local
     465             :    join.  If wksp is already locked, this will wait for the caller.  If
     466             :    this detects that the caller died while holding the lock, it will try
     467             :    to steal the lock from the dead caller and cleanup any incomplete
     468             :    operation the caller was doing.  Returns FD_WKSP_SUCCESS (0) if the
     469             :    lock was acquired or FD_WKSP_ERR_CORRUPT if the lock could not be
     470             :    obtained because memory corruption was detected while trying to
     471             :    recover from a dead caller that corrupted the wksp memory. */
     472             : 
     473             : int
     474             : fd_wksp_private_lock( fd_wksp_t * wksp );
     475             : 
     476             : /* fd_wksp_private_unlock unlocks a locked wksp.  Assumes wksp is a
     477             :    current local join and the caller has the lock */
     478             : 
     479             : static inline void
     480   545652684 : fd_wksp_private_unlock( fd_wksp_t * wksp ) {
     481   545652684 :   FD_COMPILER_MFENCE();
     482   545652684 :   FD_VOLATILE( wksp->owner ) = ULONG_MAX;
     483   545652684 :   FD_COMPILER_MFENCE();
     484   545652684 : }
     485             : 
     486             : /* private checkpt/restore APIs ***************************************/
     487             : /* FIXME: MOVE THIS TO PUBLIC HEADER? */
     488             : 
     489             : /* FD_WKSP_CHECKPT_{V1,V2}_{BINFO,UINFO}_MAX give the maximum byte size
     490             :    (including the terminating '\0') of a decompressed {v1,v2} checkpt
     491             :    {build,user} info cstr. */
     492             : 
     493           9 : #define FD_WKSP_CHECKPT_V1_BINFO_MAX (16384UL)
     494           9 : #define FD_WKSP_CHECKPT_V1_UINFO_MAX (16384UL)
     495             : 
     496             : #define FD_WKSP_CHECKPT_V2_BINFO_MAX (16384UL)
     497             : #define FD_WKSP_CHECKPT_V2_UINFO_MAX (16384UL)
     498             : 
     499             : /* A fd_wksp_checkpt_v2_hdr_t gives the byte layout of frame 0 of a wksp
     500             :    v2 checkpt.  This frame contains the style, compression algo used for
     501             :    the info, cgroup and appendix frames and fd_wksp_preview information
     502             :    uncompressed. */
     503             : 
     504             : struct fd_wksp_checkpt_v2_hdr {
     505             :   ulong magic;                     /* Must be first, ==FD_WKSP_MAGIC */
     506             :   int   style;                     /* Must be second, wksp checkpt style */
     507             :   int   frame_style_compressed;    /* frame style used for compressed frames */
     508             :   uint  reserved;                  /* header padding */
     509             :   char  name[ FD_SHMEM_NAME_MAX ]; /* cstr holding the original wksp name (note: FD_SHMEM_NAME_MAX==FD_LOG_NAME_MAX==40) */
     510             :   uint  seed;                      /* wksp seed when checkpointed (probably same used to construct) */
     511             :   ulong part_max;                  /* part_max used to construct the wksp */
     512             :   ulong data_max;                  /* data_max used to construct the wksp */
     513             : };
     514             : 
     515             : typedef struct fd_wksp_checkpt_v2_hdr fd_wksp_checkpt_v2_hdr_t;
     516             : 
     517             : /* A fd_wksp_checkpt_v2_info_t gives the byte layout of frame 1 of a
     518             :    wksp v2 checkpt.  frame 1 immediately follows frame 0 and this frame
     519             :    contains the info structure followed compactly by the corresponding
     520             :    cstr (including the terminating '\0') stored consecutively in the
     521             :    same order.  The size fields indicate the buffer layout.  This frame
     522             :    is compressed according hdr/ftr specification. */
     523             : 
     524             : struct fd_wksp_checkpt_v2_info {
     525             :   ulong mode;
     526             :   long  wallclock;
     527             :   ulong app_id;
     528             :   ulong thread_id;
     529             :   ulong host_id;
     530             :   ulong cpu_id;
     531             :   ulong group_id;
     532             :   ulong tid;
     533             :   ulong user_id;
     534             :   /* FIXME: CONSIDER MAKING THESE ALL UCHAR / USHORT / 4 BYTE RESERVED */
     535             :   ulong sz_app;    /* in [1,FD_LOG_NAME_MAX ~ 40B] */
     536             :   ulong sz_thread; /* " */
     537             :   ulong sz_host;   /* " */
     538             :   ulong sz_cpu;    /* " */
     539             :   ulong sz_group;  /* " */
     540             :   ulong sz_user;   /* " */
     541             :   ulong sz_path;   /* in [1,PATH_MAX ~ 4KiB] */
     542             :   ulong sz_binfo;  /* in [1,FD_WKSP_CHECKPT_V2_BINFO_MAX ~ 16KiB] */
     543             :   ulong sz_uinfo;  /* in [1,FD_WKSP_CHECKPT_V2_UINFO_MAX ~ 16KiB] */
     544             : };
     545             : 
     546             : typedef struct fd_wksp_checkpt_v2_info fd_wksp_checkpt_v2_info_t;
     547             : 
     548             : /* A v2 info frame is followed by zero or more volumes.  A volume
     549             :    consists of zero or more cgroup frames and an appendix frame.
     550             :    Volumes are followed by a frame with a footer command and then an
     551             :    uncompressed footer frame.
     552             : 
     553             :    A cgroup frame starts with a zero or more meta commands that describe
     554             :    the allocations it contains followed by a data command that indicates
     555             :    the cgroup data section follows.
     556             : 
     557             :    An appendix frame starts with an appendix command, giving the number
     558             :    of cgroup frames it covers and the offset to the previous appendix
     559             :    frame (0 if the first appendix frame).  This is followed by a ulong
     560             :    array with checkpt offsets to those cgroup frames followed a ulong
     561             :    array with the number of allocations in each cgroup frame.  An
     562             :    appendix covers all cgroup frames between it and the previous
     563             :    appendix frame (or info frame if the first appendix).
     564             : 
     565             :    The last volume is followed by a compressed frame with a sole volumes
     566             :    command.  The volumes command gives the offset of the appendix of the
     567             :    last volume (or 0 if there are no volumes).  (This allows the final
     568             :    frame to be uncompressed while all the volumes can be compressed.)
     569             : 
     570             :    An uncompressed footer frame follows indicating the v2 checkpt is
     571             :    done.  The command gives the total number of cgroup frames in the
     572             :    checkpt and the offset to the last volume's appendix (or 0 if no
     573             :    volumes).
     574             : 
     575             :    A fd_wksp_checkpt_v2_cmd_t supports writing an arbitrarily large
     576             :    checkpt single pass with only small upfront bounded allocation while
     577             :    supporting both streaming and parallel restore of those frames. */
     578             : 
     579             : union fd_wksp_checkpt_v2_cmd {
     580             :   struct { ulong tag; /* > 0 */ ulong gaddr_lo;                     ulong gaddr_hi;                    } meta;
     581             :   struct { ulong tag; /* ==0 */ ulong cgroup_cnt; /* ==ULONG_MAX */ ulong frame_off; /* ==ULONG_MAX */ } data;
     582             :   struct { ulong tag; /* ==0 */ ulong cgroup_cnt; /* < ULONG_MAX */ ulong frame_off; /* < ULONG_MAX */ } appendix;
     583             :   struct { ulong tag; /* ==0 */ ulong cgroup_cnt; /* ==ULONG_MAX */ ulong frame_off; /* < ULONG_MAX */ } volumes;
     584             : };
     585             : 
     586             : typedef union fd_wksp_checkpt_v2_cmd fd_wksp_checkpt_v2_cmd_t;
     587             : 
     588             : FD_FN_PURE static inline int
     589          75 : fd_wksp_checkpt_v2_cmd_is_meta( fd_wksp_checkpt_v2_cmd_t const * cmd ) {
     590          75 :   return cmd->meta.tag > 0UL;
     591          75 : }
     592             : 
     593             : FD_FN_PURE static inline int
     594          21 : fd_wksp_checkpt_v2_cmd_is_data( fd_wksp_checkpt_v2_cmd_t const * cmd ) {
     595          21 :   return (cmd->data.tag==0UL) & (cmd->data.cgroup_cnt==ULONG_MAX) & (cmd->data.frame_off==ULONG_MAX);
     596          21 : }
     597             : 
     598             : FD_FN_PURE static inline int
     599          63 : fd_wksp_checkpt_v2_cmd_is_appendix( fd_wksp_checkpt_v2_cmd_t const * cmd ) {
     600          63 :   return (cmd->appendix.tag==0UL) & (cmd->appendix.cgroup_cnt<ULONG_MAX) & (cmd->appendix.frame_off<ULONG_MAX);
     601          63 : }
     602             : 
     603             : FD_FN_PURE static inline int
     604           0 : fd_wksp_checkpt_v2_cmd_is_volumes( fd_wksp_checkpt_v2_cmd_t const * cmd ) {
     605           0 :   return (cmd->volumes.tag==0UL) & (cmd->volumes.cgroup_cnt==ULONG_MAX) & (cmd->volumes.frame_off<ULONG_MAX);
     606           0 : }
     607             : 
     608             : /* A fd_wksp_checkpt_v2_ftr_t gives the byte layout of the final frame
     609             :    of a wksp v2 checkpt.  This frame contains this footer uncompressed.
     610             :    This is wksp checkpt header backwards plus some additional
     611             :    information to allow users to seek from the end of the checkpt to the
     612             :    header (checkpt_sz), to the appendix frame (frame_off_appendix) and
     613             :    do any allocations upfront necessary to completely unpack the
     614             :    checkpt. */
     615             : 
     616             : struct fd_wksp_checkpt_v2_ftr {
     617             :   ulong alloc_cnt;                 /* total number of allocations in checkpt */
     618             :   ulong cgroup_cnt;                /* total number of cgroups     in checkpt */
     619             :   ulong volume_cnt;                /* total number of volumes     in checkpt */
     620             :   ulong frame_off;                 /* byte offset (relative to header initial byte) of the volumes command */
     621             :   ulong checkpt_sz;                /* checkpt byte size, from header initial byte to the footer final byte inclusive (note that
     622             :                                       this can be used to convert offsets relative to header initial byte to offsets relative to
     623             :                                       the end-of-file / the one past the final footer byte) */
     624             :   ulong data_max;                  /* should match header */
     625             :   ulong part_max;                  /* " */
     626             :   uint  seed;                      /* " */
     627             :   char  name[ FD_SHMEM_NAME_MAX ]; /* " */
     628             :   uint  reserved;                  /* " */
     629             :   int   frame_style_compressed;    /* " */
     630             :   int   style;                     /* " */
     631             :   ulong unmagic;                   /* ==~FD_WKSP_MAGIC */
     632             : };
     633             : 
     634             : typedef struct fd_wksp_checkpt_v2_ftr fd_wksp_checkpt_v2_ftr_t;
     635             : 
     636             : /* fd_wksp_private_{checkpt,restore,printf}_v1 provide the v1
     637             :    implementations of {checkpt,restore,printf}.  That is, checkpt_v1
     638             :    will only write a v1 style checkpt while the {restore,printt}_v1 can
     639             :    assume that the path exclusively contains a v1 style checkpt.  These
     640             :    can assume that the input arguments have been validated by their
     641             :    caller.  The printf implementation can further assume verbose is
     642             :    positive and the verbose 0 information has already been printed.  For
     643             :    checkpt/restore, if tpool is non-NULL, the operation will be
     644             :    parallelized over tpool threads [t0,t1).  Assumes the caller is
     645             :    thread t0 and threads (t0,t1) are available for thread dispatch. */
     646             : 
     647             : int
     648             : fd_wksp_private_checkpt_v1( fd_tpool_t * tpool,
     649             :                             ulong        t0,
     650             :                             ulong        t1,
     651             :                             fd_wksp_t *  wksp,
     652             :                             char const * path,
     653             :                             ulong        mode,
     654             :                             char const * uinfo );
     655             : 
     656             : int
     657             : fd_wksp_private_restore_v1( fd_tpool_t * tpool,
     658             :                             ulong        t0,
     659             :                             ulong        t1,
     660             :                             fd_wksp_t *  wksp,
     661             :                             char const * path,
     662             :                             uint         new_seed );
     663             : 
     664             : int
     665             : fd_wksp_private_printf_v1( int          fd,
     666             :                            char const * path,
     667             :                            int          verbose );
     668             : 
     669             : /* Similarly for v2.  Note that style==FD_WKSP_CHECKPT_STYLE_V3 in the
     670             :    fd_wksp_checkpt function becomes a FD_WKSP_CHECKPT_STYLE_V2 with a
     671             :    FD_CHECKPT_FRAME_STYLE_LZ4 cgroup frames in the checkpt itself. */
     672             : 
     673             : int
     674             : fd_wksp_private_checkpt_v2( fd_tpool_t * tpool,
     675             :                             ulong        t0,
     676             :                             ulong        t1,
     677             :                             fd_wksp_t *  wksp,
     678             :                             char const * path,
     679             :                             ulong        mode,
     680             :                             char const * uinfo,
     681             :                             int          frame_style_compresed );
     682             : 
     683             : int
     684             : fd_wksp_private_restore_v2( fd_tpool_t * tpool,
     685             :                             ulong        t0,
     686             :                             ulong        t1,
     687             :                             fd_wksp_t *  wksp,
     688             :                             char const * path,
     689             :                             uint         new_seed );
     690             : 
     691             : int
     692             : fd_wksp_private_printf_v2( int          fd,
     693             :                            char const * path,
     694             :                            int          verbose );
     695             : 
     696             : FD_PROTOTYPES_END
     697             : 
     698             : #endif /* HEADER_fd_src_util_wksp_fd_wksp_private_h */

Generated by: LCOV version 1.14