LCOV - code coverage report
Current view: top level - util/wksp - fd_wksp_private.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 67 76 88.2 %
Date: 2025-01-08 12:08:44 Functions: 58 1908 3.0 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_util_wksp_fd_wksp_private_h
       2             : #define HEADER_fd_src_util_wksp_fd_wksp_private_h
       3             : 
       4             : #include "fd_wksp.h"
       5             : 
       6             : /* If FD_WKSP_LOCK_RECLAIM==0, do not try to recover the lock from
       7             :    dead processes.  This is useful, for example, if we know that the
       8             :    lock will not get acquired by another process, or that if another
       9             :    acquiring process dies that all potential users will get exited.  It
      10             :    prevents a syscall on various common workspace paths (eg, alloc). */
      11             : 
      12             : #ifndef FD_WKSP_LOCK_RECLAIM
      13             : #define FD_WKSP_LOCK_RECLAIM 0
      14             : #endif
      15             : 
      16             : /* FD_WKSP_PRIVATE_PINFO_IDX_NULL is the pinfo index value used to
      17             :    indicate NULL */
      18             : 
      19 26418427791 : #define FD_WKSP_PRIVATE_PINFO_IDX_NULL ((ulong)UINT_MAX)
      20             : 
      21             : /* A fd_wksp_private_pinfo_t specifies details about a partition in a
      22             :    workspace and its relationship to other partitions in that workspace.
      23             : 
      24             :    FD_WKSP_PRIVATE_PINFO_ALIGN is an integer power of 2 and
      25             :    FD_WKSP_PRIVATE_PINFO_FOOTPRINT will be a multiple of align.
      26             : 
      27             :    If a partition is not idle:
      28             : 
      29             :    - [gaddr_lo,gaddr_hi) specify the range of offsets covered by this
      30             :      partition.
      31             : 
      32             :        wksp_gaddr_lo <= gaddr_lo < gaddr_hi <= wksp_gaddr_hi
      33             : 
      34             :      such that partitions always have at least 1 byte and are contained
      35             :      within the workspace's data region.
      36             : 
      37             :    - tag==0 indicates this partition is space in the workspace free
      38             :      for use and the partition is in the free treap, fast findable by
      39             :      its size.  Otherwise, this partition is allocated and the partition
      40             :      is in the used treap, fast O(1) findable by any in range gaddr.
      41             : 
      42             :    - prev_cidx, next_cidx give the index (in compressed form) of the
      43             :      previous / next partition if present or IDX_NULL if this is the
      44             :      workspace partition head / tail partition (in which case gaddr_lo /
      45             :      gaddr_hi will be wksp->gaddr_lo / wksp->gaddr_hi).  That is, for
      46             :      partition idx where idx is in [0,wksp->part_max):
      47             : 
      48             :        ulong prev_idx = fd_wksp_private_pinfo_idx( pinfo[ idx ].prev_cidx );
      49             :        if( fd_wksp_private_pinfo_idx_is_null( prev_idx ) ) {
      50             :          ... at this point:
      51             :          ...   idx == fd_wksp_private_pinfo_idx( wksp->part_head_cidx ) );
      52             :          ...   pinfo[ idx ].gaddr_lo == wksp->gaddr_lo );
      53             :        } else {
      54             :          ... at this point:
      55             :          ...   idx ==_fd_wksp_private_pinfo_idx( pinfo[ prev_idx ].next_cidx );
      56             :          ...   pinfo[ idx ].gaddr_lo == pinfo[ prev_idx ].gaddr_hi;
      57             :        }
      58             : 
      59             :      and:
      60             : 
      61             :        ulong next_idx = fd_wksp_private_pinfo_idx( pinfo[ idx ].next_cidx );
      62             :        if( fd_wksp_private_pinfo_idx_is_null( next_idx ) ) {
      63             :          ... at this point:
      64             :          ...   idx == fd_wksp_private_pinfo_idx( wksp->part_tail_cidx ) );
      65             :          ...   pinfo[ idx ].gaddr_hi == wksp->gaddr_hi );
      66             :        } else {
      67             :          ... at this point:
      68             :          ...   idx ==_fd_wksp_private_pinfo_idx( pinfo[ next_idx ].prev_cidx );
      69             :          ...   pinfo[ idx ].gaddr_hi == pinfo[ next_idx ].gaddr_lo;
      70             :        }
      71             : 
      72             :    - If partition idx is in the used treap, {left,right}_cidx specify
      73             :      the partition indices of the root of the left and right subtrees.
      74             : 
      75             :      The used treap obeys the binary search tree property that all
      76             :      partitions in the left/right subtree (if any) cover a range of
      77             :      offsets strictly lower/higher than range covered by idx.
      78             : 
      79             :      parent_cidx specifies idx's parent tree (if any).  If idx is the
      80             :      wksp used tree root, parent_cidx will specify IDX_NULL and
      81             :      wksp->part_used_cidx will specify idx.
      82             : 
      83             :      The used treap also obeys the heap property to make it well
      84             :      balanced on average.  Specifically, the idx's parent's heap
      85             :      priority will be at least idx's heap priority.
      86             : 
      87             :      in_same will be 0 and same_cidx will specify IDX_NULL as no used
      88             :      partitions can overlap.
      89             : 
      90             :    - If partition idx is in the free treap, if partition idx is not
      91             :      in a list of same sized partitions, in_same will be 0 and
      92             :      {left,right}_cidx specify the partition indices of the root of the
      93             :      left and right subtrees.
      94             : 
      95             :      The free treap obeys the binary search tree property that all
      96             :      partitions in the left/right subtree (if any) have partition sizes
      97             :      strictly lower/higher than partition idx's size.
      98             : 
      99             :      parent_cidx specifies idx's parent tree (if any).  If idx is the
     100             :      wksp free tree root, parent_cidx will specify IDX_NULL and
     101             :      wksp->part_free_cidx will specify idx.
     102             : 
     103             :      The free treap also obeys the heap property to make it well
     104             :      balanced on average.  Specifically, the idx's parent's heap
     105             :      priority will be at least idx's heap priority.
     106             : 
     107             :      If there are additional partitions of the same size to partition
     108             :      idx, same_cidx will refer to the next partition of the same size.
     109             : 
     110             :      If partition idx is in a list of same sized partitions, in_same
     111             :      will be 1 and parent_cidx / same_cidx will specify the prev / next
     112             :      index of additional partitions of the same size.  same_cidx will
     113             :      specify IDX_NULL if no more.
     114             : 
     115             :    - heap_prio is a random value used as described above.
     116             : 
     117             :    - stack_cidx and cycle_tag are for internal use */
     118             : 
     119             : /* TODO: Consider align 32/ footprint 96 without compressed indices if
     120             :    ever needing more than ~4B partitions. */
     121             : 
     122             : #define FD_WKSP_PRIVATE_PINFO_ALIGN     (64UL) /* At most FD_WKSP_ALIGN */
     123             : #define FD_WKSP_PRIVATE_PINFO_FOOTPRINT (64UL)
     124             : 
     125             : struct __attribute__((aligned(FD_WKSP_PRIVATE_PINFO_ALIGN))) fd_wksp_private_pinfo {
     126             :   ulong gaddr_lo;       /* If in idle stack, 0 */
     127             :   ulong gaddr_hi;       /* ",                0 */
     128             :   ulong tag;            /* ",                0 */
     129             :   uint  heap_prio : 31; /* 30 bit priority and 1 bit free to use for infinite priority bulk tree ops */
     130             :   uint  in_same   :  1; /* 1 if in a same list and 0 otherwise */
     131             :   uint  prev_cidx;      /* ",                fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
     132             :   uint  next_cidx;      /* ",                fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
     133             :   uint  left_cidx;      /* ",                fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
     134             :   uint  right_cidx;     /* ",                fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
     135             :   uint  parent_cidx;    /* ",                cidx of next idle or fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) if no more */
     136             :   uint  same_cidx;      /* ",                fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
     137             :   uint  stack_cidx;     /* internal use */
     138             :   ulong cycle_tag;      /* internal use */
     139             : };
     140             : 
     141             : typedef struct fd_wksp_private_pinfo fd_wksp_private_pinfo_t;
     142             : 
     143             : /* FD_WKSP_MAGIC is an ideally unique number that specifies the precise
     144             :    memory layout of a fd_wksp. */
     145             : 
     146         531 : #define FD_WKSP_MAGIC (0xF17EDA2C3731C591UL) /* F17E=FIRE,DA2C/3R<>DANCER,31/C59<>WKSP,0<>0 --> FIRE DANCER WKSP VERSION 1 */
     147             : 
     148             : /* fd_wksp_private specifies the detailed layout of the internals of a
     149             :    fd_wksp_t */
     150             : 
     151             : struct fd_wksp_private {
     152             : 
     153             :   /* This point is FD_WKSP_ALIGN aligned */
     154             : 
     155             :   /* This fields are static and mostly in the first cache line */
     156             : 
     157             :   ulong magic;                     /* ==FD_WKSP_MAGIC */
     158             :   ulong part_max;                  /* Max wksp partitions */
     159             :   ulong data_max;                  /* Data region */
     160             :   ulong gaddr_lo;                  /* ==fd_wksp_private_data_off( part_max ), data region covers offsets [gaddr_lo,gaddr_hi) */
     161             :   ulong gaddr_hi;                  /* ==gaddr_lo + data_max,                  offset gaddr_hi is to 1 byte footer */
     162             :   char  name[ FD_SHMEM_NAME_MAX ]; /* (Convenience) backing fd_shmem region cstr name */
     163             :   uint  seed;                      /* Heap priority random number seed, arbitrary */
     164             : 
     165             :   /* These fields are dynamic and in the adjacent cache line */
     166             : 
     167             :   uint  idle_top_cidx;             /* Stack of partition infos not in use, parent_idx is next pointer */
     168             :   uint  part_head_cidx;            /* Index for info about the leftmost partition */
     169             :   uint  part_tail_cidx;            /* Index for info about the rightmost partition */
     170             :   uint  part_used_cidx;            /* Treap of partitions that are currently used (tag!=0), searchable by gaddr */
     171             :   uint  part_free_cidx;            /* Treap of partitions that are currently free (tag==0), searchable by size */
     172             :   ulong cycle_tag;                 /* Used for cycle detection */
     173             :   ulong owner;                     /* thread group id of the owner or NULL otherwise */
     174             : 
     175             :   /* IMPORTANT!  The "single-source-of-truth" for what is currently
     176             :      used (and its tags) is the set of non-zero tagged partitions in the
     177             :      partition info array.  The idle stack, partition list, used treap
     178             :      and free treap are auxiliary data structuring that can be
     179             :      reconstructed at any time from this single source of truth.
     180             : 
     181             :      Conversely, if there accidental or deliberate data corruption of
     182             :      the wksp metadata resulting in a conflict between what is stored
     183             :      in the partition info array and the auxiliary data structures,
     184             :      the partition info array governs. */
     185             : 
     186             :   /* Padding to FD_WKSP_PRIVATE_PINFO_ALIGN here */
     187             : 
     188             :   /* part_max pinfo here */
     189             :   /* data_max byte data region here */
     190             :   /* 1 footer byte here */
     191             :   /* Padding to FD_WKSP_ALIGN here */
     192             : };
     193             : 
     194             : FD_PROTOTYPES_BEGIN
     195             : 
     196             : /* fd_wksp_private_pinfo_sz returns the size of a partition in bytes.
     197             :    Assumes pinfo points to the pinfo of a partition in a current local
     198             :    join.  Will be positive. */
     199             : 
     200             : FD_FN_PURE static inline ulong
     201  7136493510 : fd_wksp_private_pinfo_sz( fd_wksp_private_pinfo_t const * pinfo ) {
     202  7136493510 :   return pinfo->gaddr_hi - pinfo->gaddr_lo;
     203  7136493510 : }
     204             : 
     205             : /* fd_wksp_private_{part,data}_off return the wksp offset of the
     206             :    pinfo array and the data region.  data_off assumes part_max is a
     207             :    value that will not overflow. */
     208             : 
     209             : FD_FN_CONST static inline ulong
     210           0 : fd_wksp_private_pinfo_off( void ) {
     211           0 :   return 128UL; /* fd_ulong_align_up( sizeof(fd_wksp_t), FD_WKSP_PRIVATE_PINFO_ALIGN ); */
     212           0 : }
     213             : 
     214             : FD_FN_CONST static inline ulong
     215        5364 : fd_wksp_private_data_off( ulong part_max ) {
     216        5364 :   return fd_wksp_private_pinfo_off() + part_max*sizeof(fd_wksp_private_pinfo_t);
     217        5364 : }
     218             : 
     219             : /* fd_wksp_private_pinfo returns the location of wksp pinfo array in the
     220             :    caller's address space.  Assumes wksp is a current local join.
     221             :    fd_wksp_private_pinfo_const is a const-correct version. */
     222             : 
     223             : FD_FN_CONST static inline fd_wksp_private_pinfo_t *
     224   587977926 : fd_wksp_private_pinfo( fd_wksp_t * wksp ) {
     225   587977926 :   return (fd_wksp_private_pinfo_t *)(((ulong)wksp) + fd_wksp_private_pinfo_off());
     226   587977926 : }
     227             : 
     228             : FD_FN_CONST static inline fd_wksp_private_pinfo_t const *
     229           0 : fd_wksp_private_pinfo_const( fd_wksp_t const * wksp ) {
     230           0 :   return (fd_wksp_private_pinfo_t const *)(((ulong)wksp) + fd_wksp_private_pinfo_off());
     231           0 : }
     232             : 
     233             : /* fd_wksp_private_pinfo_{cidx,idx} compresses / uncompresses a pinfo index */
     234             : 
     235  8698208668 : static inline uint  fd_wksp_private_pinfo_cidx( ulong idx  ) { return (uint) idx;  }
     236 17330034808 : static inline ulong fd_wksp_private_pinfo_idx ( uint  cidx ) { return (ulong)cidx; }
     237             : 
     238             : /* fd_wksp_private_pinfo_idx_is_null returns 1 if idx is
     239             :    FD_WKSP_PRIVATE_PINFO_IDX_NULL and 0 otherwise */
     240             : 
     241 22318511025 : static inline int fd_wksp_private_pinfo_idx_is_null( ulong idx ) { return idx==FD_WKSP_PRIVATE_PINFO_IDX_NULL; }
     242             : 
     243             : /* pinfo idle stack APIs **********************************************/
     244             : 
     245             : /* fd_wksp_private_idle_stack_is_empty returns 1 if there are no idle
     246             :    partitions and 0 otherwise.  Also returns 1 if corruption is
     247             :    detected.  Assumes wksp is a current local join. */
     248             : 
     249             : static inline int
     250        7446 : fd_wksp_private_idle_stack_is_empty( fd_wksp_t * wksp ) {
     251        7446 :   return fd_wksp_private_pinfo_idx( wksp->idle_top_cidx ) >= wksp->part_max;
     252        7446 : }
     253             : 
     254             : /* fd_wksp_private_idle_stack_pop pops an idle partition off wksp's idle
     255             :    stack.  Assumes the caller knows idle stack is not empty.  The caller
     256             :    is promised that the popped partition has [gaddr_lo,gaddr_hi) = [0,0)
     257             :    tag 0, {prev, next, left, right, same, parent}_cidx specify IDX_NULL.
     258             :    Further, heap_prio should have been assigned a random value.
     259             :    stack_idx and cycle_tag are for internal use. */
     260             : 
     261             : static inline ulong                                                 /* Assumes in [0,part_max) */
     262             : fd_wksp_private_idle_stack_pop( fd_wksp_t *               wksp,     /* Assumes current local join */
     263   116699511 :                                 fd_wksp_private_pinfo_t * pinfo ) { /* == fd_wksp_private_pinfo( wksp ) */
     264   116699511 :   ulong i = fd_wksp_private_pinfo_idx( wksp->idle_top_cidx );
     265             : # if FD_HAS_DEEPASAN
     266             :   fd_asan_unpoison( &pinfo[ i ], FD_WKSP_PRIVATE_PINFO_FOOTPRINT );
     267             : # endif
     268   116699511 :   wksp->idle_top_cidx = pinfo[ i ].parent_cidx;
     269   116699511 :   pinfo[ i ].parent_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     270   116699511 :   return i;
     271   116699511 : }
     272             : 
     273             : /* fd_wksp_private_idle_stack_push pushes partition i onto the idle
     274             :    stack.  Assumes the caller knows i is not currently in the idle
     275             :    stack, partitioning, used treap or free treap. */
     276             : 
     277             : static inline void
     278             : fd_wksp_private_idle_stack_push( ulong                     i,        /* Assumes in [0,part_max) */
     279             :                                  fd_wksp_t *               wksp,     /* Assumes current local join */
     280   152579889 :                                  fd_wksp_private_pinfo_t * pinfo ) { /* == fd_wksp_private_pinfo( wksp ) */
     281   152579889 :   pinfo[ i ].gaddr_lo    = 0UL;
     282   152579889 :   pinfo[ i ].gaddr_hi    = 0UL;
     283   152579889 :   pinfo[ i ].tag         = 0U;
     284   152579889 :   pinfo[ i ].in_same     = 0U;
     285   152579889 :   pinfo[ i ].prev_cidx   = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     286   152579889 :   pinfo[ i ].next_cidx   = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     287   152579889 :   pinfo[ i ].left_cidx   = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     288   152579889 :   pinfo[ i ].right_cidx  = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     289   152579889 :   pinfo[ i ].same_cidx   = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     290   152579889 :   pinfo[ i ].parent_cidx = wksp->idle_top_cidx;
     291   152579889 :   wksp->idle_top_cidx = fd_wksp_private_pinfo_cidx( i );
     292             : 
     293             : # if FD_HAS_DEEPASAN
     294             :   fd_asan_poison( &pinfo[ i ], FD_WKSP_PRIVATE_PINFO_FOOTPRINT );
     295             : # endif
     296   152579889 : }
     297             : 
     298             : /* pinfo used treap APIs **********************************************/
     299             : 
     300             : /* fd_wksp_private_used_treap_query queries wksp's used treap for the
     301             :    used partition that holds gaddr.  On success, returns the requested
     302             :    partition idx, in [0,part_max), and, on failure, returns IDX_NULL.
     303             :    Reasons for failure include gaddr is not in a used partition and
     304             :    internal treap corruption detected.  Might consume a wksp cycle tag
     305             :    and clobber partition cycle tags.  Reasonably fast O(lg N) where N is
     306             :    the number of used partitions. */
     307             : 
     308             : ulong
     309             : fd_wksp_private_used_treap_query( ulong                     gaddr,
     310             :                                   fd_wksp_t *               wksp,
     311             :                                   fd_wksp_private_pinfo_t * pinfo );
     312             : 
     313             : /* fd_wksp_private_used_treap_insert inserts partition n into wksp's
     314             :    used treap.  Assumes n is not in the idle stack, used treap or free
     315             :    treap.  Does not care if n is in the partitioning or not.  Reasonably
     316             :    fast O(lg N) where N is the number of used partitions.
     317             : 
     318             :    Partition n should have [gaddr_lo,gaddr_hi) and heap_prio initialized
     319             :    on entry (heap_prio should be a random value).  tag need not be
     320             :    initialized but it is assumed that the caller will set the tag to its
     321             :    final value on success to make the partition officially used.  This
     322             :    will initialize {in_same, left, right, same, parent}_cidx.  This will
     323             :    ignore {prev,next}_cidx.  This might consume a wksp cycle tag and
     324             :    clobber partition stack_cidx and cycle_tag fields.
     325             : 
     326             :    Returns FD_WKSP_SUCCESS (zero) on success and a FD_WKSP_ERR_*
     327             :    (negative) on failure (logs details for failure).  Reasons for
     328             :    failure include n is not in [0,part_max), n's range is not in wksp
     329             :    data region, n was detected as already inserted (this detection is
     330             :    not guaranteed), treap internal connectivity issues were detected
     331             :    (complete detection not guaranteed), and n overlaps with at least one
     332             :    element already inserted into the treap.
     333             : 
     334             :    On failure n and the treap itself were not modified (except possibly
     335             :    clobbering of stack_cidx and cycle_tag).  Note that failure reasons
     336             :    are either user error or memory corruption.  This cannot fail in
     337             :    normal operating circumstances. */
     338             : 
     339             : int
     340             : fd_wksp_private_used_treap_insert( ulong                     n,
     341             :                                    fd_wksp_t *               wksp,    /* Assumes current local join */
     342             :                                    fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
     343             : 
     344             : /* fd_wksp_private_used_treap_remove removes partition d from wksp's
     345             :    used treap.  Assumes d in the used treap, not in the free treap, not
     346             :    in the idle stack.  Does not care if d is in the partitioning.
     347             :    Reasonably fast O(lg N) where N is the number of used partitions.
     348             :    This might consume a wksp cycle tag and clobber partition stack_cidx
     349             :    and cycle_tag fields.
     350             : 
     351             :    Returns FD_WKSP_SUCCESS (zero) on success and a FD_WKSP_ERR_*
     352             :    (negative) on failure (logs details for failure).  Reasons for
     353             :    failure include d is not in [0,part_max) and treap internal
     354             :    connectivity issues were detected (complete detection not
     355             :    guaranteed).
     356             : 
     357             :    Note that failure reasons are either user error or memory corruption.
     358             :    This cannot fail in normal operating circumstances. */
     359             : 
     360             : int
     361             : fd_wksp_private_used_treap_remove( ulong                     d,
     362             :                                    fd_wksp_t *               wksp,    /* Assumes current local join */
     363             :                                    fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
     364             : 
     365             : /* pinfo free treap APIs **********************************************/
     366             : 
     367             : /* fd_wksp_private_free_treap_query queries wksp's free treap for the
     368             :    smallest partition of at least sz.  On success, returns the index of
     369             :    a partition in the free treap suitable for sz, in [0,part_max), and,
     370             :    on failure, returns IDX_NULL.  Reasons for failure include sz zero,
     371             :    sz is larger than any free partition, and internal treap corruption
     372             :    was detected.  Might consume a wksp cycle tag and clobber partition
     373             :    cycle tags.  Reasonably fast O(lg N) where N is the number of used
     374             :    partitions. */
     375             : 
     376             : ulong
     377             : fd_wksp_private_free_treap_query( ulong                     sz,
     378             :                                   fd_wksp_t *               wksp,    /* Assumes current local join */
     379             :                                   fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
     380             : 
     381             : /* fd_wksp_private_free_treap_insert inserts partition n into wksp's
     382             :    free treap.  Assumes n is not in the idle stack, used treap or free
     383             :    treap.  Does not care if n is in the partitioning or not.  Reasonably
     384             :    fast O(lg N) where N is the number of partitions in the free treap.
     385             : 
     386             :    Partition n should have [gaddr_lo,gaddr_hi) and heap_prio initialized
     387             :    on entry (heap_prio should be a random value).  tag need not be
     388             :    initialized but it is assumed that the caller will zero the tag
     389             :    beforehand to make the partition officially free.  This will
     390             :    initialize {in_same, left, right, same, parent}_cidx.  This will
     391             :    ignore {prev,next}_cidx.  This might consume a wksp cycle tag and
     392             :    clobber the partition stack_cidx and cycle_tag fields.
     393             : 
     394             :    Returns FD_WKSP_SUCCESS (zero) on success and a FD_WKSP_ERR_*
     395             :    (negative) on failure (logs details for failure).  Reasons for
     396             :    failure include n is not in [0,part_max), n's range is not in wksp
     397             :    data region, n's tag is not zero, n was detected as already inserted
     398             :    (this detection is not guaranteed), treap internal connectivity
     399             :    issues were detected (complete detection not guaranteed).
     400             : 
     401             :    If n's size exactly matches the size of partition already in the
     402             :    treap, n will be pushed onto that partition's same stack rather than
     403             :    inserted into the treap.
     404             : 
     405             :    On failure n and the treap itself were not modified (except possibly
     406             :    clobbering of stack_cidx and cycle_tag).  Note that failures reasons
     407             :    are either user error or memory corruption.  This has no failures in
     408             :    normal operating circumstances. */
     409             : 
     410             : int
     411             : fd_wksp_private_free_treap_insert( ulong                     n,
     412             :                                    fd_wksp_t *               wksp,    /* Assumes current local join */
     413             :                                    fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
     414             : 
     415             : /* fd_wksp_private_free_treap_same_is_empty returns 1 if the same list
     416             :    for d is empty and 0 if not.  Returns 1 if corruption in detected.
     417             :    Assumes d is in the free treap. */
     418             : 
     419             : static inline int
     420             : fd_wksp_private_free_treap_same_is_empty( ulong                     d,
     421             :                                           fd_wksp_t *               wksp,     /* Assumes current local join */
     422    80603721 :                                           fd_wksp_private_pinfo_t * pinfo ) { /* == fd_wksp_private_pinfo( wksp ) */
     423    80603721 :   ulong part_max = wksp->part_max;
     424    80603721 :   return fd_wksp_private_pinfo_idx( pinfo[ d ].same_cidx )>=part_max;
     425    80603721 : }
     426             : 
     427             : /* fd_wksp_private_free_treap_same_remove removes the first partition
     428             :    from d's same list.  Assumes the caller knows d's same list is not
     429             :    empty.  The caller is promised that returned partition has the same
     430             :    size as d. */
     431             : 
     432             : static inline ulong
     433             : fd_wksp_private_free_treap_same_remove( ulong                     d,
     434             :                                         fd_wksp_t *               wksp,     /* Assumes current local join */
     435     3604728 :                                         fd_wksp_private_pinfo_t * pinfo ) { /* == fd_wksp_private_pinfo( wksp ) */
     436     3604728 :   ulong part_max = wksp->part_max;
     437     3604728 :   ulong i = fd_wksp_private_pinfo_idx( pinfo[ d ].same_cidx );
     438     3604728 :   ulong j = fd_wksp_private_pinfo_idx( pinfo[ i ].same_cidx );
     439     3604728 :   /**/             pinfo[ d ].same_cidx = fd_wksp_private_pinfo_cidx( j );
     440     3604728 :   if( j<part_max ) pinfo[ j ].parent_cidx = fd_wksp_private_pinfo_cidx( d );
     441     3604728 :   pinfo[ i ].in_same     = 0U;
     442     3604728 :   pinfo[ i ].same_cidx   = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     443     3604728 :   pinfo[ i ].parent_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     444     3604728 :   return i;
     445     3604728 : }
     446             : 
     447             : /* fd_wksp_private_free_treap_remove removes partition d from wksp's
     448             :    free treap.  Assumes d in the free treap, not in the used treap, not
     449             :    in the idle stack.  Does not care if d is in the partitioning.
     450             :    Reasonably fast O(lg N) where N is the number of free partitions.
     451             :    This might consume a wksp cycle tag and clobber partition stack_cidx
     452             :    and cycle_tag fields.  There is an edge case where d's can be swapped
     453             :    with another same sized partition.
     454             : 
     455             :    Returns FD_WKSP_SUCCESS (zero) on success and a FD_WKSP_ERR_*
     456             :    (negative) on failure (logs details for failure).  Reasons for
     457             :    failure include d is not in [0,part_max) and treap internal
     458             :    connectivity issues were detected (complete detection not
     459             :    guaranteed).
     460             : 
     461             :    Note that failure reasons are either user error or memory corruption.
     462             :    This cannot fail in normal operating circumstances. */
     463             : 
     464             : int
     465             : fd_wksp_private_free_treap_remove( ulong                     d,
     466             :                                    fd_wksp_t *               wksp,    /* Assumes current local join */
     467             :                                    fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
     468             : 
     469             : /* private admin APIs *************************************************/
     470             : 
     471             : /* fd_wksp_private_lock locks wksp.  Assumes wksp is a current local
     472             :    join.  If wksp is already locked, this will wait for the caller.  If
     473             :    this detects that the caller died while holding the lock, it will try
     474             :    to steal the lock from the dead caller and cleanup any incomplete
     475             :    operation the caller was doing.  Returns FD_WKSP_SUCCESS (0) if the
     476             :    lock was acquired or FD_WKSP_ERR_CORRUPT if the lock could not be
     477             :    obtained because memory corruption was detected while trying to
     478             :    recover from a dead caller that corrupted the wksp memory. */
     479             : 
     480             : int
     481             : fd_wksp_private_lock( fd_wksp_t * wksp );
     482             : 
     483             : /* fd_wksp_private_unlock unlocks a locked wksp.  Assumes wksp is a
     484             :    current local join and the caller has the lock */
     485             : 
     486             : static inline void
     487   587977248 : fd_wksp_private_unlock( fd_wksp_t * wksp ) {
     488   587977248 :   FD_COMPILER_MFENCE();
     489   587977248 :   FD_VOLATILE( wksp->owner ) = ULONG_MAX;
     490   587977248 :   FD_COMPILER_MFENCE();
     491   587977248 : }
     492             : 
     493             : /* private checkpt/restore APIs ***************************************/
     494             : /* FIXME: MOVE THIS TO PUBLIC HEADER? */
     495             : 
     496             : /* FD_WKSP_CHECKPT_{V1,V2}_{BINFO,UINFO}_MAX give the maximum byte size
     497             :    (including the terminating '\0') of a decompressed {v1,v2} checkpt
     498             :    {build,user} info cstr. */
     499             : 
     500           9 : #define FD_WKSP_CHECKPT_V1_BINFO_MAX (16384UL)
     501           9 : #define FD_WKSP_CHECKPT_V1_UINFO_MAX (16384UL)
     502             : 
     503             : #define FD_WKSP_CHECKPT_V2_BINFO_MAX (16384UL)
     504             : #define FD_WKSP_CHECKPT_V2_UINFO_MAX (16384UL)
     505             : 
     506             : /* A fd_wksp_checkpt_v2_hdr_t gives the byte layout of frame 0 of a wksp
     507             :    v2 checkpt.  This frame contains the style, compression algo used for
     508             :    the info, cgroup and appendix frames and fd_wksp_preview information
     509             :    uncompressed. */
     510             : 
     511             : struct fd_wksp_checkpt_v2_hdr {
     512             :   ulong magic;                     /* Must be first, ==FD_WKSP_MAGIC */
     513             :   int   style;                     /* Must be second, wksp checkpt style */
     514             :   int   frame_style_compressed;    /* frame style used for compressed frames */
     515             :   uint  reserved;                  /* header padding */
     516             :   char  name[ FD_SHMEM_NAME_MAX ]; /* cstr holding the original wksp name (note: FD_SHMEM_NAME_MAX==FD_LOG_NAME_MAX==40) */
     517             :   uint  seed;                      /* wksp seed when checkpointed (probably same used to construct) */
     518             :   ulong part_max;                  /* part_max used to construct the wksp */
     519             :   ulong data_max;                  /* data_max used to construct the wksp */
     520             : };
     521             : 
     522             : typedef struct fd_wksp_checkpt_v2_hdr fd_wksp_checkpt_v2_hdr_t;
     523             : 
     524             : /* A fd_wksp_checkpt_v2_info_t gives the byte layout of frame 1 of a
     525             :    wksp v2 checkpt.  frame 1 immediately follows frame 0 and this frame
     526             :    contains the info structure followed compactly by the corresponding
     527             :    cstr (including the terminating '\0') stored consecutively in the
     528             :    same order.  The size fields indicate the buffer layout.  This frame
     529             :    is compressed according hdr/ftr specification. */
     530             : 
     531             : struct fd_wksp_checkpt_v2_info {
     532             :   ulong mode;
     533             :   long  wallclock;
     534             :   ulong app_id;
     535             :   ulong thread_id;
     536             :   ulong host_id;
     537             :   ulong cpu_id;
     538             :   ulong group_id;
     539             :   ulong tid;
     540             :   ulong user_id;
     541             :   /* FIXME: CONSIDER MAKING THESE ALL UCHAR / USHORT / 4 BYTE RESERVED */
     542             :   ulong sz_app;    /* in [1,FD_LOG_NAME_MAX ~ 40B] */
     543             :   ulong sz_thread; /* " */
     544             :   ulong sz_host;   /* " */
     545             :   ulong sz_cpu;    /* " */
     546             :   ulong sz_group;  /* " */
     547             :   ulong sz_user;   /* " */
     548             :   ulong sz_path;   /* in [1,PATH_MAX ~ 4KiB] */
     549             :   ulong sz_binfo;  /* in [1,FD_WKSP_CHECKPT_V2_BINFO_MAX ~ 16KiB] */
     550             :   ulong sz_uinfo;  /* in [1,FD_WKSP_CHECKPT_V2_UINFO_MAX ~ 16KiB] */
     551             : };
     552             : 
     553             : typedef struct fd_wksp_checkpt_v2_info fd_wksp_checkpt_v2_info_t;
     554             : 
     555             : /* A v2 info frame is followed by zero or more volumes.  A volume
     556             :    consists of zero or more cgroup frames and an appendix frame.
     557             :    Volumes are followed by a frame with a footer command and then an
     558             :    uncompressed footer frame.
     559             : 
     560             :    A cgroup frame starts with a zero or more meta commands that describe
     561             :    the allocations it contains followed by a data command that indicates
     562             :    the cgroup data section follows.
     563             : 
     564             :    An appendix frame starts with an appendix command, giving the number
     565             :    of cgroup frames it covers and the offset to the previous appendix
     566             :    frame (0 if the first appendix frame).  This is followed by a ulong
     567             :    array with checkpt offsets to those cgroup frames followed a ulong
     568             :    array with the number of allocations in each cgroup frame.  An
     569             :    appendix covers all cgroup frames between it and the previous
     570             :    appendix frame (or info frame if the first appendix).
     571             : 
     572             :    The last volume is followed by a compressed frame with a sole volumes
     573             :    command.  The volumes command gives the offset of the appendix of the
     574             :    last volume (or 0 if there are no volumes).  (This allows the final
     575             :    frame to be uncompressed while all the volumes can be compressed.)
     576             : 
     577             :    An uncompressed footer frame follows indicating the v2 checkpt is
     578             :    done.  The command gives the total number of cgroup frames in the
     579             :    checkpt and the offset to the last volume's appendix (or 0 if no
     580             :    volumes).
     581             : 
     582             :    A fd_wksp_checkpt_v2_cmd_t supports writing an arbitrarily large
     583             :    checkpt single pass with only small upfront bounded allocation while
     584             :    supporting both streaming and parallel restore of those frames. */
     585             : 
     586             : union fd_wksp_checkpt_v2_cmd {
     587             :   struct { ulong tag; /* > 0 */ ulong gaddr_lo;                     ulong gaddr_hi;                    } meta;
     588             :   struct { ulong tag; /* ==0 */ ulong cgroup_cnt; /* ==ULONG_MAX */ ulong frame_off; /* ==ULONG_MAX */ } data;
     589             :   struct { ulong tag; /* ==0 */ ulong cgroup_cnt; /* < ULONG_MAX */ ulong frame_off; /* < ULONG_MAX */ } appendix;
     590             :   struct { ulong tag; /* ==0 */ ulong cgroup_cnt; /* ==ULONG_MAX */ ulong frame_off; /* < ULONG_MAX */ } volumes;
     591             : };
     592             : 
     593             : typedef union fd_wksp_checkpt_v2_cmd fd_wksp_checkpt_v2_cmd_t;
     594             : 
     595             : FD_FN_PURE static inline int
     596          75 : fd_wksp_checkpt_v2_cmd_is_meta( fd_wksp_checkpt_v2_cmd_t const * cmd ) {
     597          75 :   return cmd->meta.tag > 0UL;
     598          75 : }
     599             : 
     600             : FD_FN_PURE static inline int
     601          21 : fd_wksp_checkpt_v2_cmd_is_data( fd_wksp_checkpt_v2_cmd_t const * cmd ) {
     602          21 :   return (cmd->data.tag==0UL) & (cmd->data.cgroup_cnt==ULONG_MAX) & (cmd->data.frame_off==ULONG_MAX);
     603          21 : }
     604             : 
     605             : FD_FN_PURE static inline int
     606          63 : fd_wksp_checkpt_v2_cmd_is_appendix( fd_wksp_checkpt_v2_cmd_t const * cmd ) {
     607          63 :   return (cmd->appendix.tag==0UL) & (cmd->appendix.cgroup_cnt<ULONG_MAX) & (cmd->appendix.frame_off<ULONG_MAX);
     608          63 : }
     609             : 
     610             : FD_FN_PURE static inline int
     611           0 : fd_wksp_checkpt_v2_cmd_is_volumes( fd_wksp_checkpt_v2_cmd_t const * cmd ) {
     612           0 :   return (cmd->volumes.tag==0UL) & (cmd->volumes.cgroup_cnt==ULONG_MAX) & (cmd->volumes.frame_off<ULONG_MAX);
     613           0 : }
     614             : 
     615             : /* A fd_wksp_checkpt_v2_ftr_t gives the byte layout of the final frame
     616             :    of a wksp v2 checkpt.  This frame contains this footer uncompressed.
     617             :    This is wksp checkpt header backwards plus some additional
     618             :    information to allow users to seek from the end of the checkpt to the
     619             :    header (checkpt_sz), to the appendix frame (frame_off_appendix) and
     620             :    do any allocations upfront necessary to completely unpack the
     621             :    checkpt. */
     622             : 
     623             : struct fd_wksp_checkpt_v2_ftr {
     624             :   ulong alloc_cnt;                 /* total number of allocations in checkpt */
     625             :   ulong cgroup_cnt;                /* total number of cgroups     in checkpt */
     626             :   ulong volume_cnt;                /* total number of volumes     in checkpt */
     627             :   ulong frame_off;                 /* byte offset (relative to header initial byte) of the volumes command */
     628             :   ulong checkpt_sz;                /* checkpt byte size, from header initial byte to the footer final byte inclusive (note that
     629             :                                       this can be used to convert offsets relative to header initial byte to offsets relative to
     630             :                                       the end-of-file / the one past the final footer byte) */
     631             :   ulong data_max;                  /* should match header */
     632             :   ulong part_max;                  /* " */
     633             :   uint  seed;                      /* " */
     634             :   char  name[ FD_SHMEM_NAME_MAX ]; /* " */
     635             :   uint  reserved;                  /* " */
     636             :   int   frame_style_compressed;    /* " */
     637             :   int   style;                     /* " */
     638             :   ulong unmagic;                   /* ==~FD_WKSP_MAGIC */
     639             : };
     640             : 
     641             : typedef struct fd_wksp_checkpt_v2_ftr fd_wksp_checkpt_v2_ftr_t;
     642             : 
     643             : /* fd_wksp_private_{checkpt,restore,printf}_v1 provide the v1
     644             :    implementations of {checkpt,restore,printf}.  That is, checkpt_v1
     645             :    will only write a v1 style checkpt while the {restore,printt}_v1 can
     646             :    assume that the path exclusively contains a v1 style checkpt.  These
     647             :    can assume that the input arguments have been validated by their
     648             :    caller.  The printf implementation can further assume verbose is
     649             :    positive and the verbose 0 information has already been printed.  For
     650             :    checkpt/restore, if tpool is non-NULL, the operation will be
     651             :    parallelized over tpool threads [t0,t1).  Assumes the caller is
     652             :    thread t0 and threads (t0,t1) are available for thread dispatch. */
     653             : 
     654             : int
     655             : fd_wksp_private_checkpt_v1( fd_tpool_t * tpool,
     656             :                             ulong        t0,
     657             :                             ulong        t1,
     658             :                             fd_wksp_t *  wksp,
     659             :                             char const * path,
     660             :                             ulong        mode,
     661             :                             char const * uinfo );
     662             : 
     663             : int
     664             : fd_wksp_private_restore_v1( fd_tpool_t * tpool,
     665             :                             ulong        t0,
     666             :                             ulong        t1,
     667             :                             fd_wksp_t *  wksp,
     668             :                             char const * path,
     669             :                             uint         new_seed );
     670             : 
     671             : int
     672             : fd_wksp_private_printf_v1( int          fd,
     673             :                            char const * path,
     674             :                            int          verbose );
     675             : 
     676             : /* Similarly for v2.  Note that style==FD_WKSP_CHECKPT_STYLE_V3 in the
     677             :    fd_wksp_checkpt function becomes a FD_WKSP_CHECKPT_STYLE_V2 with a
     678             :    FD_CHECKPT_FRAME_STYLE_LZ4 cgroup frames in the checkpt itself. */
     679             : 
     680             : int
     681             : fd_wksp_private_checkpt_v2( fd_tpool_t * tpool,
     682             :                             ulong        t0,
     683             :                             ulong        t1,
     684             :                             fd_wksp_t *  wksp,
     685             :                             char const * path,
     686             :                             ulong        mode,
     687             :                             char const * uinfo,
     688             :                             int          frame_style_compresed );
     689             : 
     690             : int
     691             : fd_wksp_private_restore_v2( fd_tpool_t * tpool,
     692             :                             ulong        t0,
     693             :                             ulong        t1,
     694             :                             fd_wksp_t *  wksp,
     695             :                             char const * path,
     696             :                             uint         new_seed );
     697             : 
     698             : int
     699             : fd_wksp_private_printf_v2( int          fd,
     700             :                            char const * path,
     701             :                            int          verbose );
     702             : 
     703             : FD_PROTOTYPES_END
     704             : 
     705             : #endif /* HEADER_fd_src_util_wksp_fd_wksp_private_h */

Generated by: LCOV version 1.14