LCOV - code coverage report
Current view: top level - util/wksp - fd_wksp_admin.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 377 433 87.1 %
Date: 2025-03-20 12:08:36 Functions: 16 18 88.9 %

          Line data    Source code
       1             : #include "fd_wksp_private.h"
       2             : #include <sys/mman.h>
       3             : #include <errno.h>
       4             : 
       5             : int
       6   545652576 : fd_wksp_private_lock( fd_wksp_t * wksp ) {
       7             : # if FD_WKSP_LOCK_RECLAIM
       8             :   int   warning = 0;
       9             : #endif
      10   545652576 :   ulong me      = fd_log_group_id();
      11             : 
      12   545652576 :   ulong * _owner = &wksp->owner;
      13   545652576 :   for(;;) {
      14             : 
      15             :     /* Note that we emulate CAS on platforms without FD_HAS_ATOMIC
      16             :        to minimize the amount of code differences we have to test.  On
      17             :        platforms without FD_HAS_ATOMIC, a workspace should not be used
      18             :        concurrently though. */
      19             : 
      20   545652576 :     FD_COMPILER_MFENCE();
      21   545652576 : #   if FD_HAS_ATOMIC
      22   545652576 :     ulong pid = FD_ATOMIC_CAS( _owner, ULONG_MAX, me );
      23             : #   else
      24             :     ulong pid = FD_VOLATILE_CONST( *_owner );
      25             :     if( pid==ULONG_MAX ) FD_VOLATILE( *_owner ) = me;
      26             : #   endif
      27   545652576 :     FD_COMPILER_MFENCE();
      28             : 
      29   545652576 :     if( FD_LIKELY( pid==ULONG_MAX ) ) return FD_WKSP_SUCCESS;
      30             : 
      31             : # if FD_WKSP_LOCK_RECLAIM
      32             :     int status = fd_log_group_id_query( pid );
      33             :     if( FD_UNLIKELY( status==FD_LOG_GROUP_ID_QUERY_DEAD ) ) { /* A process died while holding the lock, try to recover the lock */
      34             : 
      35             :       FD_COMPILER_MFENCE();
      36             : #     if FD_HAS_ATOMIC
      37             :       ulong cur = FD_ATOMIC_CAS( _owner, pid, me );
      38             : #     else
      39             :       ulong cur = FD_VOLATILE_CONST( *_owner );
      40             :       if( cur==pid ) FD_VOLATILE( *_owner ) = me;
      41             : #     endif
      42             :       FD_COMPILER_MFENCE();
      43             : 
      44             :       if( FD_LIKELY( cur==pid ) ) { /* We recovered the lock from the dead pid, try to fix up incomplete ops */
      45             : 
      46             :         FD_LOG_WARNING(( "Process %lu died in an operation on wksp %s; verifying", pid, wksp->name ));
      47             :         if( FD_LIKELY( !fd_wksp_verify( wksp ) ) ) { /* logs details of issues detected */
      48             :           FD_LOG_NOTICE(( "wksp verified" ));
      49             :           return FD_WKSP_SUCCESS;
      50             :         }
      51             : 
      52             :         FD_LOG_WARNING(( "Issues detected; rebuilding" ));
      53             :         if( FD_UNLIKELY( fd_wksp_rebuild( wksp, wksp->seed ) ) ) { /* Rebuild failed (logs details of issues detected) */
      54             :           /* Return control of the lock to the previous owner */
      55             :           FD_COMPILER_MFENCE();
      56             :           FD_VOLATILE( *_owner ) = pid;
      57             :           FD_COMPILER_MFENCE();
      58             :           FD_LOG_WARNING(( "corrupt wksp detected" ));
      59             :           return FD_WKSP_ERR_CORRUPT;
      60             :         }
      61             : 
      62             :         FD_LOG_NOTICE(( "wksp rebuilt" ));
      63             :         return FD_WKSP_SUCCESS;
      64             : 
      65             :       }
      66             : 
      67             :       /* Somebody beat us to recovering the lock ... try again */
      68             : 
      69             :     } else if( FD_UNLIKELY( status!=FD_LOG_GROUP_ID_QUERY_LIVE ) ) { /* Unclear pid status ... issue a warning and try again */
      70             : 
      71             :       if( FD_UNLIKELY( !warning ) ) {
      72             :         FD_LOG_WARNING(( "wksp %s is owned by unknown pid %li; attempting to recover", wksp->name, pid ));
      73             :         warning = 1;
      74             :       }
      75             : 
      76             :     }
      77             : 
      78             :     /* At this point, either another thread in this process has the
      79             :        lock, another active thread in another process has the lock,
      80             :        another unknown status thread in other process has the lock or
      81             :        another thread beat us to reclaim the lock from a dead process.
      82             :        In any case, we don't have the lock.  Wait a while to limit O/S
      83             :        contention and try again. */
      84             : 
      85             :     FD_YIELD();
      86             : # else
      87             : 
      88             :     /* If we are running without FD_WKSP_LOCK_RECLAIM then it is assumed
      89             :        that the contention is caused by a tile pinned to another core,
      90             :        and that this core is itself pinned so spin locking is best. */
      91           0 :     FD_SPIN_PAUSE();
      92             : 
      93           0 : #endif
      94           0 :   }
      95             : 
      96             :   /* never get here */
      97   545652576 : }
      98             : 
      99             : /* Public APIs ********************************************************/
     100             : 
     101             : ulong
     102             : fd_wksp_part_max_est( ulong footprint,
     103         135 :                       ulong sz_typical ) {
     104         135 :   footprint       = fd_ulong_align_dn( footprint, FD_WKSP_ALIGN );
     105         135 :   ulong data_end  = footprint - 1UL;
     106         135 :   ulong pinfo_off = fd_wksp_private_pinfo_off();
     107         135 :   ulong consumed  = sizeof(fd_wksp_private_pinfo_t) + sz_typical;
     108         135 :   ulong part_max  = (data_end - pinfo_off) / (consumed + (ulong)!consumed); /* avoid div-by-zero */
     109         135 :   if( FD_UNLIKELY( (!footprint) | (!sz_typical) | (sz_typical>consumed) | (pinfo_off>data_end) ) ) return 0UL;
     110         126 :   return fd_ulong_min( part_max, FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     111         135 : }
     112             : 
     113             : ulong
     114             : fd_wksp_data_max_est( ulong footprint,
     115         144 :                       ulong part_max ) {
     116         144 :   footprint = fd_ulong_align_dn( footprint, FD_WKSP_ALIGN );
     117         144 :   ulong data_end  = footprint - 1UL;
     118         144 :   ulong data_off  = fd_wksp_private_data_off( part_max );
     119         144 :   if( FD_UNLIKELY( (!part_max) | (part_max>FD_WKSP_PRIVATE_PINFO_IDX_NULL) |
     120         144 :                    (part_max > ((ULONG_MAX - fd_wksp_private_pinfo_off())/sizeof(fd_wksp_private_pinfo_t))) | /* covered above */
     121         144 :                    (!footprint) | (data_off>=data_end) ) ) return 0UL;
     122         126 :   return data_end - data_off;
     123         144 : }
     124             : 
     125             : ulong
     126           3 : fd_wksp_align( void ) {
     127           3 :   return FD_WKSP_ALIGN;
     128           3 : }
     129             : 
     130             : ulong
     131             : fd_wksp_footprint( ulong part_max,
     132        1596 :                    ulong data_max ) {
     133        1596 :   ulong data_off = fd_wksp_private_data_off( part_max );
     134        1596 :   if( FD_UNLIKELY( (!part_max) | (part_max>FD_WKSP_PRIVATE_PINFO_IDX_NULL) | (!data_max) |
     135        1596 :                    (part_max > ((ULONG_MAX - fd_wksp_private_pinfo_off())/sizeof(fd_wksp_private_pinfo_t))) | /* Covered above */
     136        1596 :                    (data_max > (ULONG_MAX - FD_WKSP_ALIGN + 1UL - data_off - 1UL)                         ) ) ) return 0UL;
     137        1527 :   return fd_ulong_align_up( data_off + data_max + 1UL, FD_WKSP_ALIGN );
     138        1596 : }
     139             : 
     140             : void *
     141             : fd_wksp_new( void *       shmem,
     142             :              char const * name,
     143             :              uint         seed,
     144             :              ulong        part_max,
     145         129 :              ulong        data_max ) {
     146         129 :   fd_wksp_t * wksp = (fd_wksp_t *)shmem;
     147             : 
     148         129 :   if( FD_UNLIKELY( !wksp ) ) {
     149           3 :     FD_LOG_WARNING(( "NULL shmem" ));
     150           3 :     return NULL;
     151           3 :   }
     152             : 
     153         126 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     154           3 :     FD_LOG_WARNING(( "bad align" ));
     155           3 :     return NULL;
     156           3 :   }
     157             : 
     158         123 :   ulong name_len = fd_shmem_name_len( name );
     159         123 :   if( FD_UNLIKELY( !name_len ) ) {
     160           3 :     FD_LOG_WARNING(( "bad name" ));
     161           3 :     return NULL;
     162           3 :   }
     163             : 
     164         120 :   ulong footprint = fd_wksp_footprint( part_max, data_max );
     165         120 :   if( FD_UNLIKELY( !footprint ) ) {
     166          12 :     FD_LOG_WARNING(( "bad part_max and/or data_max" ));
     167          12 :     return NULL;
     168          12 :   }
     169             : 
     170         108 :   fd_memset( wksp, 0, fd_wksp_footprint( part_max, 1UL ) );
     171             : 
     172         108 :   wksp->part_max       = part_max;
     173         108 :   wksp->data_max       = data_max;
     174         108 :   wksp->gaddr_lo       = fd_wksp_private_data_off( part_max );
     175         108 :   wksp->gaddr_hi       = wksp->gaddr_lo + data_max;
     176         108 :   fd_memcpy( wksp->name, name, name_len+1UL );
     177         108 :   wksp->seed           = seed;
     178         108 :   wksp->idle_top_cidx  = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     179         108 :   wksp->part_head_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     180         108 :   wksp->part_tail_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     181         108 :   wksp->part_used_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     182         108 :   wksp->part_free_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     183         108 :   wksp->cycle_tag      = 4UL;  /* Verify uses tags 0-3 */
     184         108 :   wksp->owner          = 0UL;  /* Mark as locked and in construction */
     185             : 
     186             :   /* Note that wksp->owner was set to zero above, "locking" the wksp by
     187             :      group_id 0.  And the memset above set all the partition tags to
     188             :      zero such that there are no allocated partitions.  So once we set
     189             :      magic below, we can finish the initialization by rebuilding and
     190             :      unlocking.  Since fd_log_group_id is non-zero, the zero owner
     191             :      indicates to any remote observer of the shared memory region that
     192             :      the wksp is being built for the first time. */
     193             : 
     194         108 :   FD_COMPILER_MFENCE();
     195         108 :   FD_VOLATILE( wksp->magic ) = FD_WKSP_MAGIC;
     196         108 :   FD_COMPILER_MFENCE();
     197             : 
     198         108 :   int err = fd_wksp_rebuild( wksp, seed );
     199         108 :   if( FD_UNLIKELY( err ) ) { /* Should be impossible at this point */
     200             : 
     201           0 :     FD_COMPILER_MFENCE();
     202           0 :     FD_VOLATILE( wksp->magic ) = 0UL;
     203           0 :     FD_COMPILER_MFENCE();
     204             : 
     205           0 :     FD_LOG_WARNING(( "fd_wksp_rebuild failed (%i-%s)", err, fd_wksp_strerror( err ) ));
     206           0 :     return NULL;
     207           0 :   }
     208             : 
     209             :   #if FD_HAS_DEEPASAN
     210             :   /* Poison entire workspace except wksp header and the pinfo array. */
     211             :   void * wksp_data = (void*)((ulong)wksp + fd_wksp_private_pinfo_off());
     212             :   fd_asan_poison( wksp_data, footprint - fd_wksp_private_pinfo_off() );
     213             :   fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
     214             :   fd_asan_unpoison( pinfo, FD_WKSP_PRIVATE_PINFO_FOOTPRINT * part_max );
     215             :   #endif
     216             : 
     217         108 :   fd_wksp_private_unlock( wksp );
     218             : 
     219         108 :   return wksp;
     220         108 : }
     221             : 
     222             : fd_wksp_t *
     223        1923 : fd_wksp_join( void * shwksp ) {
     224        1923 :   fd_wksp_t * wksp = (fd_wksp_t *)shwksp;
     225             : 
     226        1923 :   if( FD_UNLIKELY( !wksp ) ) {
     227           3 :     FD_LOG_WARNING(( "NULL shwksp" ));
     228           3 :     return NULL;
     229           3 :   }
     230             : 
     231        1920 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     232           3 :     FD_LOG_WARNING(( "bad align" ));
     233           3 :     return NULL;
     234           3 :   }
     235             : 
     236        1917 :   if( FD_UNLIKELY( wksp->magic!=FD_WKSP_MAGIC ) ) {
     237           3 :     FD_LOG_WARNING(( "bad magic" ));
     238           3 :     return NULL;
     239           3 :   }
     240             : 
     241        1914 :   return wksp;
     242        1917 : }
     243             : 
     244             : void *
     245        1815 : fd_wksp_leave( fd_wksp_t * wksp ) {
     246        1815 :   if( FD_UNLIKELY( !wksp ) ) {
     247           3 :     FD_LOG_WARNING(( "NULL wksp" ));
     248           3 :     return NULL;
     249           3 :   }
     250             : 
     251        1812 :   return (void *)wksp;
     252        1815 : }
     253             : 
     254             : void *
     255         105 : fd_wksp_delete( void * shwksp ) {
     256         105 :   fd_wksp_t * wksp = (fd_wksp_t *)shwksp;
     257             : 
     258         105 :   if( FD_UNLIKELY( !wksp ) ) {
     259           3 :     FD_LOG_WARNING(( "NULL shwksp" ));
     260           3 :     return NULL;
     261           3 :   }
     262             : 
     263         102 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     264           3 :     FD_LOG_WARNING(( "bad align" ));
     265           3 :     return NULL;
     266           3 :   }
     267             : 
     268          99 :   if( FD_UNLIKELY( wksp->magic!=FD_WKSP_MAGIC ) ) {
     269           3 :     FD_LOG_WARNING(( "bad magic" ));
     270           3 :     return NULL;
     271           3 :   }
     272             : 
     273             :   /* TODO: consider testing owner */
     274             : 
     275          96 :   FD_COMPILER_MFENCE();
     276          96 :   FD_VOLATILE( wksp->magic ) = 0UL;
     277          96 :   FD_COMPILER_MFENCE();
     278             : 
     279             : # if FD_HAS_DEEPASAN
     280             :   /* Unpoison entire wksp region. */
     281             :   ulong footprint = fd_wksp_footprint( wksp->part_max, wksp->data_max );
     282             :   void * wksp_data = (void*)((ulong)wksp + fd_wksp_private_pinfo_off());
     283             :   fd_asan_unpoison( wksp_data, footprint - fd_wksp_private_pinfo_off());
     284             : # endif
     285             : 
     286          96 :   return wksp;
     287          99 : }
     288             : 
     289           3 : char const * fd_wksp_name    ( fd_wksp_t const * wksp ) { return wksp->name;     }
     290         111 : uint         fd_wksp_seed    ( fd_wksp_t const * wksp ) { return wksp->seed;     }
     291           3 : ulong        fd_wksp_part_max( fd_wksp_t const * wksp ) { return wksp->part_max; }
     292           3 : ulong        fd_wksp_data_max( fd_wksp_t const * wksp ) { return wksp->data_max; }
     293             : 
     294             : ulong
     295           0 : fd_wksp_owner( fd_wksp_t const * wksp ) {
     296           0 :   FD_COMPILER_MFENCE();
     297           0 :   ulong owner = FD_VOLATILE_CONST( wksp->owner );
     298           0 :   FD_COMPILER_MFENCE();
     299           0 :   return owner;
     300           0 : }
     301             : 
     302             : char const *
     303          48 : fd_wksp_strerror( int err ) {
     304          48 :   switch( err ) {
     305           3 :   case FD_WKSP_SUCCESS:     return "success";
     306          12 :   case FD_WKSP_ERR_INVAL:   return "inval";
     307          30 :   case FD_WKSP_ERR_FAIL:    return "fail";
     308           3 :   case FD_WKSP_ERR_CORRUPT: return "corrupt";
     309           0 :   default: break;
     310          48 :   }
     311           0 :   return "unknown";
     312          48 : }
     313             : 
     314             : int
     315          51 : fd_wksp_verify( fd_wksp_t * wksp ) {
     316             : 
     317      430491 : # define TEST(c) do {                                                                             \
     318      430491 :     if( FD_UNLIKELY( !(c) ) ) { FD_LOG_WARNING(( "FAIL: %s", #c )); return FD_WKSP_ERR_CORRUPT; } \
     319      430491 :   } while(0)
     320             : 
     321             :   /* Validate metadata */
     322             : 
     323          51 :   TEST( wksp );
     324          51 :   TEST( wksp->magic==FD_WKSP_MAGIC );
     325             : 
     326          51 :   ulong part_max = wksp->part_max;
     327          51 :   ulong data_max = wksp->data_max;
     328          51 :   TEST( fd_wksp_footprint( part_max, data_max ) );
     329             : 
     330          51 :   ulong gaddr_lo = wksp->gaddr_lo; TEST( gaddr_lo==fd_wksp_private_data_off( part_max ) );
     331          51 :   ulong gaddr_hi = wksp->gaddr_hi; TEST( gaddr_hi==gaddr_lo+data_max                    );
     332             : 
     333          51 :   TEST( fd_shmem_name_len( wksp->name ) );
     334             : 
     335             :   /* seed is arbitrary */
     336             : 
     337          51 :   TEST( wksp->cycle_tag >= 4UL );
     338             : 
     339             :   /* TODO: consider verifying owner */
     340             : 
     341          51 :   fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
     342             : 
     343             :   /* Clear out cycle tags */
     344             : 
     345      201381 :   for( ulong i=0UL; i<part_max; i++ ) pinfo[ i ].cycle_tag = 0UL;
     346             : 
     347             :   /* Verify the idle stack */
     348             : 
     349          51 :   ulong idle_cnt = 0UL;
     350             : 
     351          51 :   do {
     352          51 :     ulong i = fd_wksp_private_pinfo_idx( wksp->idle_top_cidx );
     353      199071 :     while( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     354             : 
     355             :       /* Visit i.  Note that i has not been validated yet. */
     356             : 
     357      199020 :       TEST( i<part_max            ); /* Validate i */
     358      199020 :       TEST( !pinfo[ i ].cycle_tag ); /* Make sure not visited before */
     359      199020 :       pinfo[ i ].cycle_tag = 1UL;    /* Mark as visited in idle stack */
     360      199020 :       idle_cnt++;                    /* Update the idle cnt */
     361             : 
     362             :       /* Advance to the next idle */
     363             : 
     364      199020 :       i = fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx );
     365      199020 :     }
     366          51 :   } while(0);
     367             : 
     368             :   /* Idle stack looks intact, verify partitioning */
     369             : 
     370          51 :   ulong free_cnt = 0UL;
     371          51 :   ulong used_cnt = 0UL;
     372             : 
     373          51 :   do {
     374          51 :     ulong j = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     375          51 :     ulong i = fd_wksp_private_pinfo_idx( wksp->part_head_cidx );
     376          51 :     ulong g = gaddr_lo;
     377             : 
     378          51 :     int last_free = 0;
     379             : 
     380        2361 :     while( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     381             : 
     382             :       /* At this point, we last visited j.  Visit i.  Note that j has
     383             :          been validated but i has not. */
     384             : 
     385        2310 :       TEST( i<part_max                                           ); /* Validate i */
     386        2310 :       TEST( pinfo[ i ].gaddr_lo==g                               ); /* Make sure partition is tightly adjacent to previous */
     387        2310 :       TEST( pinfo[ i ].gaddr_hi> g                               ); /* Make sure partition size is non-zero */
     388        2310 :       TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].prev_cidx )==j ); /* Make sure correct prev partition */
     389        2310 :       TEST( !pinfo[ i ].cycle_tag                                ); /* Make sure not visited before */
     390        2310 :       pinfo[ i ].cycle_tag = 2UL;                                   /* Mark as visited in partitioning */
     391             : 
     392        2310 :       g = pinfo[ i ].gaddr_hi;                                      /* Extract where the next partition should start */
     393        2310 :       int is_free = !pinfo[ i ].tag;                                /* Determine if this partition is free or used */
     394        2310 :       TEST( !(last_free & is_free) );                               /* Make sure no adjacent free partitions */
     395        2310 :       free_cnt += (ulong) is_free;                                  /* Update the free cnt */
     396        2310 :       used_cnt += (ulong)!is_free;                                  /* Update the used cnt */
     397             : 
     398             :       /* Advance to the next partition */
     399             : 
     400        2310 :       last_free = is_free;
     401             : 
     402        2310 :       j = i;
     403        2310 :       i = fd_wksp_private_pinfo_idx( pinfo[ i ].next_cidx );
     404        2310 :     }
     405             : 
     406          51 :     TEST( fd_wksp_private_pinfo_idx( wksp->part_tail_cidx )==j ); /* Make sure correct partition tail */
     407          51 :     TEST( g==gaddr_hi );                                          /* Make sure complete partitioning */
     408          51 :     TEST( (idle_cnt + free_cnt + used_cnt)==part_max );           /* Make sure no lost idle partitions */
     409          51 :   } while(0);
     410             : 
     411             :   /* Idle stack and partitioning look intact, validate used treap */
     412             : 
     413          51 :   do {
     414          51 :     ulong visit_cnt = 0UL;
     415             : 
     416          51 :     ulong i = fd_wksp_private_pinfo_idx( wksp->part_used_cidx );
     417          51 :     ulong s = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     418          51 :     ulong g = gaddr_lo;
     419             : 
     420          51 :     if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     421          39 :       TEST( i<part_max );                                             /* Validate i */
     422          39 :       TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     423          39 :     }
     424             : 
     425        2691 :     for(;;) {
     426             : 
     427             :       /* At this point i is and everything on stack is validated */
     428             : 
     429        2691 :       if( fd_wksp_private_pinfo_idx_is_null( i ) ) {
     430        1371 :         if( fd_wksp_private_pinfo_idx_is_null( s ) ) break; /* Done */
     431             : 
     432             :         /* Pop stack */
     433             : 
     434        1320 :         i = s;
     435        1320 :         s = fd_wksp_private_pinfo_idx( pinfo[ i ].stack_cidx );
     436             : 
     437             :         /* Visit i */
     438             : 
     439        1320 :         ulong p = fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx ); /* Extract the parent */
     440             : 
     441        1320 :         TEST( pinfo[ i ].gaddr_lo>=g    );            /* Make sure this starts after last visited */
     442        1320 :         TEST( pinfo[ i ].tag            );            /* Make sure tagged as a used partition */
     443        1320 :         TEST( pinfo[ i ].cycle_tag==2UL );            /* Make sure in partitioning and not visited yet this traversal */
     444        1320 :         if( !fd_wksp_private_pinfo_idx_is_null( p ) ) /* Make sure heap property satisfied */
     445        1281 :           TEST( pinfo[ p ].heap_prio >= pinfo[ i ].heap_prio );
     446             : 
     447        1320 :         TEST( !pinfo[ i ].in_same );                  /* Make sure unique */
     448        1320 :         TEST( fd_wksp_private_pinfo_idx_is_null( fd_wksp_private_pinfo_idx( pinfo[ i ].same_cidx ) ) ); /* " */
     449             : 
     450        1320 :         pinfo[ i ].cycle_tag = 3UL;                   /* Mark as visited this traversal */
     451        1320 :         visit_cnt++;                                  /* Update the visit cnt */
     452        1320 :         g = pinfo[ i ].gaddr_hi;                      /* Get minimum start for next partition */
     453             : 
     454             :         /* Traverse the right subtree */
     455             : 
     456        1320 :         p = i;
     457        1320 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].right_cidx );
     458        1320 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     459         672 :           TEST( i<part_max );                                             /* Validate i */
     460         672 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==p ); /* Validate parent */
     461         672 :         }
     462             : 
     463        1320 :       } else {
     464             : 
     465             :         /* At this point i and everything on the stack is validated.
     466             :            Push i to the stack and recurse on the left subtree. */
     467             : 
     468        1320 :         pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( s );
     469        1320 :         s = i;
     470        1320 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].left_cidx );
     471        1320 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     472         609 :           TEST( i<part_max );                                             /* Validate i */
     473         609 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     474         609 :         }
     475             : 
     476        1320 :       }
     477        2691 :     }
     478             : 
     479          51 :     TEST( visit_cnt==used_cnt ); /* Make sure all used partitions in used treap */
     480          51 :   } while(0);
     481             : 
     482             :   /* Idle stack, partitioning and used treap look intact, validate the
     483             :      free treap. */
     484             : 
     485          51 :   do {
     486          51 :     ulong visit_cnt = 0UL;
     487             : 
     488          51 :     ulong i  = fd_wksp_private_pinfo_idx( wksp->part_free_cidx );
     489          51 :     ulong s  = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     490          51 :     ulong sz = 0UL;
     491             : 
     492          51 :     if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     493          51 :       TEST( i<part_max );                                             /* Validate i */
     494          51 :       TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     495          51 :     }
     496             : 
     497        1209 :     for(;;) {
     498             : 
     499             :       /* At this point i and everything on the stack is validated */
     500             : 
     501        1209 :       if( fd_wksp_private_pinfo_idx_is_null( i ) ) {
     502         630 :         if( fd_wksp_private_pinfo_idx_is_null( s ) ) break; /* Done */
     503             : 
     504             :         /* Pop stack */
     505             : 
     506         579 :         i = s;
     507         579 :         s = fd_wksp_private_pinfo_idx( pinfo[ i ].stack_cidx );
     508             : 
     509             :         /* Visit i */
     510             : 
     511         579 :         ulong p   = fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx ); /* Extract the parent */
     512         579 :         ulong isz = fd_wksp_private_pinfo_sz( pinfo + i );               /* Extract the size */
     513             : 
     514         579 :         TEST( isz>sz              ); /* Make sure this partition i larger than previous */
     515         579 :         TEST( !pinfo[ i ].tag     ); /* Make sure tagged as a free partition */
     516         579 :         TEST( !pinfo[ i ].in_same ); /* Make sure marked as not in same */
     517             : 
     518         579 :         if( !fd_wksp_private_pinfo_idx_is_null( p ) ) { /* Make sure heap property satisfied */
     519         528 :           TEST( pinfo[ p ].heap_prio >= pinfo[ i ].heap_prio );
     520         528 :         }
     521             : 
     522         579 :         sz = isz; /* Update largest size partition seen so far */
     523             : 
     524             :         /* Traverse all same sized partitions */
     525             : 
     526         579 :         ulong j = i;
     527         990 :         for(;;) {
     528             : 
     529             :           /* At this point, j is validated */
     530             : 
     531         990 :           TEST( pinfo[ j ].cycle_tag==2UL ); /* Make sure in partitioning and not visited yet this traversal */
     532         990 :           pinfo[ j ].cycle_tag = 3UL;        /* Mark as visited this traversal */
     533         990 :           visit_cnt++;
     534             : 
     535         990 :           ulong k = fd_wksp_private_pinfo_idx( pinfo[ j ].same_cidx );    /* Get the next same sized */
     536         990 :           if( fd_wksp_private_pinfo_idx_is_null( k ) ) break;             /* If no more, we are done with this node */
     537         411 :           TEST( k<part_max );                                             /* Make sure valid index */
     538         411 :           TEST( fd_wksp_private_pinfo_sz( pinfo + k )==sz );              /* Make sure same size */
     539         411 :           TEST( pinfo[ k ].in_same );                                     /* Make sure marked as in same */
     540         411 :           TEST( fd_wksp_private_pinfo_idx_is_null( fd_wksp_private_pinfo_idx( pinfo[ k ].left_cidx  ) ) );
     541         411 :           TEST( fd_wksp_private_pinfo_idx_is_null( fd_wksp_private_pinfo_idx( pinfo[ k ].right_cidx ) ) );
     542         411 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ k ].parent_cidx )==j ); /* Make sure correct parent */
     543         411 :           j = k;
     544         411 :         }
     545             : 
     546             :         /* Recurse on the right subtree */
     547             : 
     548         579 :         p = i;
     549         579 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].right_cidx );
     550         579 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     551         270 :           TEST( i<part_max );                                             /* Validate i */
     552         270 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==p ); /* Validate parent */
     553         270 :         }
     554             : 
     555         579 :       } else {
     556             : 
     557         579 :         TEST( i<part_max ); /* Validate i */
     558             : 
     559             :         /* At this point i and everything on the stack is validated.
     560             :            Push i to the stack and recurse on the left subtree. */
     561             : 
     562         579 :         pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( s );
     563         579 :         s = i;
     564         579 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].left_cidx );
     565         579 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     566         258 :           TEST( i<part_max );                                             /* Validate i */
     567         258 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     568         258 :         }
     569         579 :       }
     570        1209 :     }
     571             : 
     572          51 :     TEST( visit_cnt==free_cnt ); /* Make sure all free partitions in free treap */
     573             : 
     574          51 :   } while(0);
     575             : 
     576          51 : # undef TEST
     577             : 
     578          51 :   return FD_WKSP_SUCCESS;
     579          51 : }
     580             : 
     581             : int
     582             : fd_wksp_rebuild( fd_wksp_t * wksp,
     583         792 :                  uint        seed ) {
     584             : 
     585             :   /* Load the wksp metadata, don't rebuild if any of it looks even
     586             :      slightly off. */
     587             : 
     588         792 :   if( FD_UNLIKELY( !wksp ) ) {
     589           0 :     FD_LOG_WARNING(( "NULL wksp" ));
     590           0 :     return FD_WKSP_ERR_CORRUPT;
     591           0 :   }
     592             : 
     593         792 :   ulong magic     = wksp->magic;
     594         792 :   ulong part_max  = wksp->part_max;
     595         792 :   ulong data_max  = wksp->data_max;
     596         792 :   ulong gaddr_lo  = wksp->gaddr_lo;
     597         792 :   ulong gaddr_hi  = wksp->gaddr_hi;
     598         792 :   ulong cycle_tag = wksp->cycle_tag;
     599             : 
     600             :   /* TODO: consider verifying owner */
     601             : 
     602         792 :   ulong footprint    = fd_wksp_footprint( part_max, data_max );
     603         792 :   ulong gaddr_lo_exp = fd_wksp_private_data_off( part_max );
     604         792 :   ulong gaddr_hi_exp = gaddr_lo_exp + data_max;
     605         792 :   if( FD_UNLIKELY( (magic!=FD_WKSP_MAGIC) | (!footprint) | (!fd_shmem_name_len( wksp->name )) |
     606         792 :                    (gaddr_lo!=gaddr_lo_exp) | (gaddr_hi!=gaddr_hi_exp) | (cycle_tag<4UL) ) ) {
     607           0 :     FD_LOG_WARNING(( "bad metadata\n\t"
     608           0 :                      "magic     %016lx (exp %016lx)\n\t"
     609           0 :                      "part_max  %lu data_max %lu (footprint %lu)\n\t"
     610           0 :                      "gaddr_lo  %lu (exp %lu)\n\t"
     611           0 :                      "gaddr_hi  %lu (exp %lu)\n\t"
     612           0 :                      "cycle_tag %lu (exp>=4)",
     613           0 :                      magic, FD_WKSP_MAGIC, part_max, data_max, footprint,
     614           0 :                      gaddr_lo, gaddr_lo_exp, gaddr_hi, gaddr_hi_exp, cycle_tag ));
     615           0 :     return FD_WKSP_ERR_CORRUPT;
     616           0 :   }
     617             : 
     618             :   /* Scan the wksp pinfo and insert any used partitions into the used
     619             :      treap and put the rest on the idle stack.  If there is any sign of
     620             :      corruption (empty, bad range or overlap between used partitions),
     621             :      we abort the rebuild (this is almost certainly data corruption of
     622             :      some form and we don't have enough info to resolve a conflict
     623             :      without potentially making the situation worse).  We do the scan in
     624             :      reverse order to rebuild the idle stack in forward order.
     625             : 
     626             :      Note that we don't ever change the gaddr_lo,gaddr_hi of any tagged
     627             :      partitions such that operation is guaranteed to never change the
     628             :      single source of truth.  As such, this operation can be interrupted
     629             :      and restarted arbitrarily safely.*/
     630             : 
     631         792 :   fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
     632             : 
     633         792 :   do {
     634         792 :     wksp->seed           = seed;
     635         792 :     wksp->idle_top_cidx  = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL ); /* Flush idle stack */
     636         792 :     wksp->part_used_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL ); /* Flush used treap */
     637         792 :     wksp->part_free_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL ); /* Flush free treap */
     638             : 
     639         792 :     ulong i = part_max;
     640     4931139 :     while( i ) {
     641     4930347 :       i--;
     642             : 
     643             :       /* Ideally, heap priorities should just be a shuffling of the
     644             :          integers [0,part_max).  fd_uint_hash will generate such a
     645             :          shuffling for part_max = 2^32.  Using the lower 30 bits
     646             :          (reserving bit 31 for bulk operations) will yield something
     647             :          very close.  We use seed to mix it up some more. */
     648             : 
     649     4930347 :       pinfo[ i ].in_same    = 0U;
     650     4930347 :       pinfo[ i ].heap_prio  = fd_uint_hash( seed ^ (uint)i ) & ((1U<<30)-1U);
     651     4930347 :       pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     652     4930347 :       pinfo[ i ].cycle_tag  = 0U;
     653             : 
     654     4930347 :       ulong tag = pinfo[ i ].tag;
     655     4930347 :       if( !tag ) { /* Not used ... make it available for reuse below */
     656     4921479 :         fd_wksp_private_idle_stack_push( i, wksp, pinfo );
     657     4921479 :         continue;
     658     4921479 :       }
     659             : 
     660        8868 :       pinfo[ i ].prev_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     661        8868 :       pinfo[ i ].next_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     662             : 
     663        8868 :       if( FD_UNLIKELY( fd_wksp_private_used_treap_insert( i, wksp, pinfo ) ) ) return FD_WKSP_ERR_CORRUPT; /* Logs details */
     664        8868 :     }
     665         792 :   } while(0);
     666             : 
     667             :   /* At this point, a partition is either in the idle stack or used
     668             :      treap.  Further, we have:
     669             : 
     670             :                  | used                       | idle
     671             :        ----------+----------------------------+--------
     672             :        gaddr_*   | non-empty range            | 0
     673             :                  | no overlap with other used | 0
     674             :        tag       | non-zero                   | 0
     675             :        in_same   | 0                          | 0
     676             :        heap_prio | randomized                 | randomized
     677             :        prev      | NULL                       | NULL
     678             :        next      | NULL                       | NULL
     679             :        left      | used treap managed         | NULL
     680             :        right     | used treap managed         | NULL
     681             :        same      | used treap managed (NULL)  | NULL
     682             :        parent    | used treap managed         | idle stack managed
     683             :        stack     | wksp managed               | wksp managed
     684             :        cycle_tag | wksp managed               | wksp managed
     685             : 
     686             :      In-order traverse the used treap to rebuild the partitioning and
     687             :      the free treap. */
     688             : 
     689         792 :   do {
     690         792 :     uint * j_next_cidx_ptr = &wksp->part_head_cidx;  /* Location of most recently added partition next link */
     691             : 
     692         792 :     ulong  j  = FD_WKSP_PRIVATE_PINFO_IDX_NULL; /* Most recently added partition */
     693         792 :     ulong  g0 = gaddr_lo;                       /* Most recently added partition end */
     694             : 
     695         792 :     ulong i = fd_wksp_private_pinfo_idx( wksp->part_used_cidx );
     696         792 :     ulong s = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     697       18528 :     for(;;) {
     698       18528 :       if( fd_wksp_private_pinfo_idx_is_null( i ) ) {
     699        9660 :         if( fd_wksp_private_pinfo_idx_is_null( s ) ) break; /* Done */
     700             : 
     701             :         /* Pop traversal stack */
     702             : 
     703        8868 :         i = s;
     704        8868 :         s = fd_wksp_private_pinfo_idx( pinfo[ i ].stack_cidx );
     705             : 
     706             :         /* Visit i */
     707             : 
     708        8868 :         ulong g1 = pinfo[ i ].gaddr_lo;
     709        8868 :         if( g1 > g0 ) { /* There's a gap between i and the most recently added partition */
     710             : 
     711             :           /* Acquire an idle partition to hold the gap */
     712             : 
     713        6231 :           if( FD_UNLIKELY( fd_wksp_private_idle_stack_is_empty( wksp ) ) ) {
     714           0 :             FD_LOG_WARNING(( "part_max (%lu) too small to fill gap before partition %lu (tag %lu gaddr_lo %lu gaddr_hi %lu)",
     715           0 :                              part_max, i, pinfo[i].tag, pinfo[i].gaddr_lo, pinfo[i].gaddr_hi ));
     716           0 :             return FD_WKSP_ERR_CORRUPT;
     717           0 :           }
     718        6231 :           ulong k = fd_wksp_private_idle_stack_pop( wksp, pinfo );
     719             : 
     720             :           /* Populate the acquired partition with the gap details,
     721             :              append it to the wksp partitioning and insert it into the
     722             :              free treap.  Note that stack_push/pop reset gaddr_lo,
     723             :              gaddr_hi, tag, in_same, {prev, next, left, right, same,
     724             :              parent}_cidx.  It preserved heap_prio from its original
     725             :              assignment and didn't touch stack_cidx or cycle_tag. */
     726             : 
     727        6231 :           pinfo[ k ].gaddr_lo  = g0;
     728        6231 :           pinfo[ k ].gaddr_hi  = g1;
     729        6231 :           pinfo[ k ].prev_cidx = fd_wksp_private_pinfo_cidx( j );
     730        6231 :           *j_next_cidx_ptr = fd_wksp_private_pinfo_cidx( k );
     731        6231 :           j_next_cidx_ptr  = &pinfo[ k ].next_cidx;
     732        6231 :           j  = k;
     733        6231 :           g0 = g1;
     734             : 
     735        6231 :           fd_wksp_private_free_treap_insert( j, wksp, pinfo );
     736        6231 :         }
     737             : 
     738             :         /* Add i to the partitioning. */
     739             : 
     740        8868 :         pinfo[ i ].prev_cidx = fd_wksp_private_pinfo_cidx( j );
     741        8868 :         *j_next_cidx_ptr = fd_wksp_private_pinfo_cidx( i );
     742        8868 :         j_next_cidx_ptr  = &pinfo[ i ].next_cidx;
     743        8868 :         j  = i;
     744        8868 :         g0 = pinfo[ i ].gaddr_hi;
     745             : 
     746             :         /* Traverse the right subtree */
     747             : 
     748        8868 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].right_cidx );
     749             : 
     750        8868 :       } else {
     751             : 
     752             :         /* Push i to the stack and recurse on the left subtree. */
     753             : 
     754        8868 :         pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( s );
     755        8868 :         s = i;
     756        8868 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].left_cidx );
     757             : 
     758        8868 :       }
     759       18528 :     }
     760             : 
     761         792 :     if( g0 < gaddr_hi ) { /* Have final gap to fill */
     762             : 
     763             :       /* This works the same as the above */
     764             : 
     765         792 :       if( FD_UNLIKELY( fd_wksp_private_idle_stack_is_empty( wksp ) ) ) {
     766           0 :         FD_LOG_WARNING(( "part_max (%lu) too small to complete partitioning", part_max ));
     767           0 :         return FD_WKSP_ERR_CORRUPT;
     768           0 :       }
     769         792 :       ulong k = fd_wksp_private_idle_stack_pop( wksp, pinfo );
     770             : 
     771         792 :       pinfo[ k ].gaddr_lo  = g0;
     772         792 :       pinfo[ k ].gaddr_hi  = gaddr_hi;
     773         792 :       pinfo[ k ].prev_cidx = fd_wksp_private_pinfo_cidx( j );
     774         792 :       *j_next_cidx_ptr = fd_wksp_private_pinfo_cidx( k );
     775         792 :       j_next_cidx_ptr  = &pinfo[ k ].next_cidx;
     776         792 :       j  = k;
     777             :     //g0 = gaddr_hi;
     778             : 
     779         792 :       fd_wksp_private_free_treap_insert( j, wksp, pinfo );
     780         792 :     }
     781             : 
     782         792 :     wksp->part_tail_cidx = fd_wksp_private_pinfo_cidx( j );
     783             : 
     784         792 :   } while(0);
     785             : 
     786         792 :   return FD_WKSP_SUCCESS;
     787         792 : }
     788             : 
     789             : void
     790           0 : fd_wksp_mprotect( fd_wksp_t * wksp, int flag ) {
     791           0 :   if( FD_UNLIKELY( !wksp ) ) {
     792           0 :     FD_LOG_WARNING(( "NULL wksp" ));
     793           0 :     return;
     794           0 :   }
     795             : 
     796           0 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     797           0 :     FD_LOG_WARNING(( "bad align" ));
     798           0 :     return;
     799           0 :   }
     800             : 
     801           0 :   if( FD_UNLIKELY( wksp->magic!=FD_WKSP_MAGIC ) ) {
     802           0 :     FD_LOG_WARNING(( "bad magic" ));
     803           0 :     return;
     804           0 :   }
     805             : 
     806           0 :   ulong wksp_footprint = fd_wksp_footprint( wksp->part_max, wksp->data_max );
     807           0 :   if( FD_UNLIKELY( mprotect( (void*)wksp, wksp_footprint, ( flag ? PROT_READ : (PROT_READ|PROT_WRITE) ) ) ) ) {
     808           0 :     FD_LOG_WARNING(( "mprotect failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     809           0 :     return;
     810           0 :   }
     811           0 : }

Generated by: LCOV version 1.14