LCOV - code coverage report
Current view: top level - util/wksp - fd_wksp_admin.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 379 416 91.1 %
Date: 2026-06-29 05:51:35 Functions: 18 19 94.7 %

          Line data    Source code
       1             : #include "fd_wksp_private.h"
       2             : 
       3             : int
       4    60391680 : fd_wksp_private_lock( fd_wksp_t * wksp ) {
       5             : # if FD_WKSP_LOCK_RECLAIM
       6             :   int   warning = 0;
       7             : #endif
       8    60391680 :   ulong me      = fd_log_group_id();
       9             : 
      10    60391680 :   ulong * _owner = &wksp->owner;
      11    60391680 :   for(;;) {
      12             : 
      13             :     /* Note that we emulate CAS on platforms without FD_HAS_ATOMIC
      14             :        to minimize the amount of code differences we have to test.  On
      15             :        platforms without FD_HAS_ATOMIC, a workspace should not be used
      16             :        concurrently though. */
      17             : 
      18    60391680 :     FD_COMPILER_MFENCE();
      19    60391680 : #   if FD_HAS_ATOMIC
      20    60391680 :     ulong pid = FD_ATOMIC_CAS( _owner, ULONG_MAX, me );
      21             : #   else
      22             :     ulong pid = FD_VOLATILE_CONST( *_owner );
      23             :     if( pid==ULONG_MAX ) FD_VOLATILE( *_owner ) = me;
      24             : #   endif
      25    60391680 :     FD_COMPILER_MFENCE();
      26             : 
      27    60391680 :     if( FD_LIKELY( pid==ULONG_MAX ) ) return FD_WKSP_SUCCESS;
      28             : 
      29             : # if FD_WKSP_LOCK_RECLAIM
      30             :     int status = fd_log_group_id_query( pid );
      31             :     if( FD_UNLIKELY( status==FD_LOG_GROUP_ID_QUERY_DEAD ) ) { /* A process died while holding the lock, try to recover the lock */
      32             : 
      33             :       FD_COMPILER_MFENCE();
      34             : #     if FD_HAS_ATOMIC
      35             :       ulong cur = FD_ATOMIC_CAS( _owner, pid, me );
      36             : #     else
      37             :       ulong cur = FD_VOLATILE_CONST( *_owner );
      38             :       if( cur==pid ) FD_VOLATILE( *_owner ) = me;
      39             : #     endif
      40             :       FD_COMPILER_MFENCE();
      41             : 
      42             :       if( FD_LIKELY( cur==pid ) ) { /* We recovered the lock from the dead pid, try to fix up incomplete ops */
      43             : 
      44             :         FD_LOG_WARNING(( "Process %lu died in an operation on wksp %s; verifying", pid, wksp->name ));
      45             :         if( FD_LIKELY( !fd_wksp_verify( wksp ) ) ) { /* logs details of issues detected */
      46             :           FD_LOG_NOTICE(( "wksp verified" ));
      47             :           return FD_WKSP_SUCCESS;
      48             :         }
      49             : 
      50             :         FD_LOG_WARNING(( "Issues detected; rebuilding" ));
      51             :         if( FD_UNLIKELY( fd_wksp_rebuild( wksp, wksp->seed ) ) ) { /* Rebuild failed (logs details of issues detected) */
      52             :           /* Return control of the lock to the previous owner */
      53             :           FD_COMPILER_MFENCE();
      54             :           FD_VOLATILE( *_owner ) = pid;
      55             :           FD_COMPILER_MFENCE();
      56             :           FD_LOG_WARNING(( "corrupt wksp detected" ));
      57             :           return FD_WKSP_ERR_CORRUPT;
      58             :         }
      59             : 
      60             :         FD_LOG_NOTICE(( "wksp rebuilt" ));
      61             :         return FD_WKSP_SUCCESS;
      62             : 
      63             :       }
      64             : 
      65             :       /* Somebody beat us to recovering the lock ... try again */
      66             : 
      67             :     } else if( FD_UNLIKELY( status!=FD_LOG_GROUP_ID_QUERY_LIVE ) ) { /* Unclear pid status ... issue a warning and try again */
      68             : 
      69             :       if( FD_UNLIKELY( !warning ) ) {
      70             :         FD_LOG_WARNING(( "wksp %s is owned by unknown pid %li; attempting to recover", wksp->name, pid ));
      71             :         warning = 1;
      72             :       }
      73             : 
      74             :     }
      75             : 
      76             :     /* At this point, either another thread in this process has the
      77             :        lock, another active thread in another process has the lock,
      78             :        another unknown status thread in other process has the lock or
      79             :        another thread beat us to reclaim the lock from a dead process.
      80             :        In any case, we don't have the lock.  Wait a while to limit O/S
      81             :        contention and try again. */
      82             : 
      83             :     FD_YIELD();
      84             : # else
      85             : 
      86             :     /* If we are running without FD_WKSP_LOCK_RECLAIM then it is assumed
      87             :        that the contention is caused by a tile pinned to another core,
      88             :        and that this core is itself pinned so spin locking is best. */
      89           0 :     FD_SPIN_PAUSE();
      90             : 
      91           0 : #endif
      92           0 :   }
      93             : 
      94             :   /* never get here */
      95    60391680 : }
      96             : 
      97             : /* Public APIs ********************************************************/
      98             : 
      99             : ulong
     100             : fd_wksp_part_max_est( ulong footprint,
     101         303 :                       ulong sz_typical ) {
     102         303 :   footprint       = fd_ulong_align_dn( footprint, FD_WKSP_ALIGN );
     103         303 :   ulong data_end  = footprint - 1UL;
     104         303 :   ulong pinfo_off = fd_wksp_private_pinfo_off();
     105         303 :   ulong consumed  = sizeof(fd_wksp_private_pinfo_t) + sz_typical;
     106         303 :   ulong part_max  = (data_end - pinfo_off) / (consumed + (ulong)!consumed); /* avoid div-by-zero */
     107         303 :   if( FD_UNLIKELY( (!footprint) | (!sz_typical) | (sz_typical>consumed) | (pinfo_off>data_end) ) ) return 0UL;
     108         294 :   return fd_ulong_min( part_max, FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     109         303 : }
     110             : 
     111             : ulong
     112             : fd_wksp_data_max_est( ulong footprint,
     113         276 :                       ulong part_max ) {
     114         276 :   footprint = fd_ulong_align_dn( footprint, FD_WKSP_ALIGN );
     115         276 :   ulong data_end  = footprint - 1UL;
     116         276 :   ulong data_off  = fd_wksp_private_data_off( part_max );
     117         276 :   if( FD_UNLIKELY( (!part_max) | (part_max>FD_WKSP_PRIVATE_PINFO_IDX_NULL) |
     118         276 :                    (part_max > ((ULONG_MAX - fd_wksp_private_pinfo_off())/sizeof(fd_wksp_private_pinfo_t))) | /* covered above */
     119         276 :                    (!footprint) | (data_off>=data_end) ) ) return 0UL;
     120         258 :   return data_end - data_off;
     121         276 : }
     122             : 
     123             : ulong
     124           3 : fd_wksp_align( void ) {
     125           3 :   return FD_WKSP_ALIGN;
     126           3 : }
     127             : 
     128             : ulong
     129             : fd_wksp_footprint( ulong part_max,
     130        1461 :                    ulong data_max ) {
     131        1461 :   ulong data_off = fd_wksp_private_data_off( part_max );
     132        1461 :   if( FD_UNLIKELY( (!part_max) | (part_max>FD_WKSP_PRIVATE_PINFO_IDX_NULL) | (!data_max) |
     133        1461 :                    (part_max > ((ULONG_MAX - fd_wksp_private_pinfo_off())/sizeof(fd_wksp_private_pinfo_t))) | /* Covered above */
     134        1461 :                    (data_max > (ULONG_MAX - FD_WKSP_ALIGN + 1UL - data_off - 1UL)                         ) ) ) return 0UL;
     135        1392 :   return fd_ulong_align_up( data_off + data_max + 1UL, FD_WKSP_ALIGN );
     136        1461 : }
     137             : 
     138             : void *
     139             : fd_wksp_new( void *       shmem,
     140             :              char const * name,
     141             :              uint         seed,
     142             :              ulong        part_max,
     143         261 :              ulong        data_max ) {
     144         261 :   fd_wksp_t * wksp = (fd_wksp_t *)shmem;
     145             : 
     146         261 :   if( FD_UNLIKELY( !wksp ) ) {
     147           3 :     FD_LOG_WARNING(( "NULL shmem" ));
     148           3 :     return NULL;
     149           3 :   }
     150             : 
     151         258 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     152           3 :     FD_LOG_WARNING(( "bad align" ));
     153           3 :     return NULL;
     154           3 :   }
     155             : 
     156         255 :   ulong name_len = fd_shmem_name_len( name );
     157         255 :   if( FD_UNLIKELY( !name_len ) ) {
     158           3 :     FD_LOG_WARNING(( "bad name" ));
     159           3 :     return NULL;
     160           3 :   }
     161             : 
     162         252 :   ulong footprint = fd_wksp_footprint( part_max, data_max );
     163         252 :   if( FD_UNLIKELY( !footprint ) ) {
     164          12 :     FD_LOG_WARNING(( "bad part_max and/or data_max" ));
     165          12 :     return NULL;
     166          12 :   }
     167             : 
     168         240 :   fd_memset( wksp, 0, fd_wksp_footprint( part_max, 1UL ) );
     169             : 
     170         240 :   wksp->part_max       = part_max;
     171         240 :   wksp->data_max       = data_max;
     172         240 :   wksp->gaddr_lo       = fd_wksp_private_data_off( part_max );
     173         240 :   wksp->gaddr_hi       = wksp->gaddr_lo + data_max;
     174         240 :   fd_memcpy( wksp->name, name, name_len+1UL );
     175         240 :   wksp->seed           = seed;
     176         240 :   wksp->idle_top_cidx  = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     177         240 :   wksp->part_head_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     178         240 :   wksp->part_tail_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     179         240 :   wksp->part_used_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     180         240 :   wksp->part_free_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     181         240 :   wksp->cycle_tag      = 4UL;  /* Verify uses tags 0-3 */
     182         240 :   wksp->owner          = 0UL;  /* Mark as locked and in construction */
     183             : 
     184             :   /* Note that wksp->owner was set to zero above, "locking" the wksp by
     185             :      group_id 0.  And the memset above set all the partition tags to
     186             :      zero such that there are no allocated partitions.  So once we set
     187             :      magic below, we can finish the initialization by rebuilding and
     188             :      unlocking.  Since fd_log_group_id is non-zero, the zero owner
     189             :      indicates to any remote observer of the shared memory region that
     190             :      the wksp is being built for the first time. */
     191             : 
     192         240 :   FD_COMPILER_MFENCE();
     193         240 :   FD_VOLATILE( wksp->magic ) = FD_WKSP_MAGIC;
     194         240 :   FD_COMPILER_MFENCE();
     195             : 
     196         240 :   int err = fd_wksp_rebuild( wksp, seed );
     197         240 :   if( FD_UNLIKELY( err ) ) { /* Should be impossible at this point */
     198             : 
     199           0 :     FD_COMPILER_MFENCE();
     200           0 :     FD_VOLATILE( wksp->magic ) = 0UL;
     201           0 :     FD_COMPILER_MFENCE();
     202             : 
     203           0 :     FD_LOG_WARNING(( "fd_wksp_rebuild failed (%i-%s)", err, fd_wksp_strerror( err ) ));
     204           0 :     return NULL;
     205           0 :   }
     206             : 
     207             :   #if FD_HAS_DEEPASAN
     208             :   /* Poison entire workspace except wksp header and the pinfo array. */
     209             :   void * wksp_data = (void*)((ulong)wksp + fd_wksp_private_pinfo_off());
     210             :   fd_asan_poison( wksp_data, footprint - fd_wksp_private_pinfo_off() );
     211             :   fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
     212             :   fd_asan_unpoison( pinfo, FD_WKSP_PRIVATE_PINFO_FOOTPRINT * part_max );
     213             :   #endif
     214             : 
     215         240 :   fd_wksp_private_unlock( wksp );
     216             : 
     217         240 :   return wksp;
     218         240 : }
     219             : 
     220             : fd_wksp_t *
     221        2052 : fd_wksp_join( void * shwksp ) {
     222        2052 :   fd_wksp_t * wksp = (fd_wksp_t *)shwksp;
     223             : 
     224        2052 :   if( FD_UNLIKELY( !wksp ) ) {
     225           3 :     FD_LOG_WARNING(( "NULL shwksp" ));
     226           3 :     return NULL;
     227           3 :   }
     228             : 
     229        2049 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     230           3 :     FD_LOG_WARNING(( "bad align" ));
     231           3 :     return NULL;
     232           3 :   }
     233             : 
     234        2046 :   if( FD_UNLIKELY( wksp->magic!=FD_WKSP_MAGIC ) ) {
     235           3 :     FD_LOG_WARNING(( "bad magic" ));
     236           3 :     return NULL;
     237           3 :   }
     238             : 
     239        2043 :   return wksp;
     240        2046 : }
     241             : 
     242             : void *
     243        1836 : fd_wksp_leave( fd_wksp_t * wksp ) {
     244        1836 :   if( FD_UNLIKELY( !wksp ) ) {
     245           3 :     FD_LOG_WARNING(( "NULL wksp" ));
     246           3 :     return NULL;
     247           3 :   }
     248             : 
     249        1833 :   return (void *)wksp;
     250        1836 : }
     251             : 
     252             : void *
     253         126 : fd_wksp_delete( void * shwksp ) {
     254         126 :   fd_wksp_t * wksp = (fd_wksp_t *)shwksp;
     255             : 
     256         126 :   if( FD_UNLIKELY( !wksp ) ) {
     257           3 :     FD_LOG_WARNING(( "NULL shwksp" ));
     258           3 :     return NULL;
     259           3 :   }
     260             : 
     261         123 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     262           3 :     FD_LOG_WARNING(( "bad align" ));
     263           3 :     return NULL;
     264           3 :   }
     265             : 
     266         120 :   if( FD_UNLIKELY( wksp->magic!=FD_WKSP_MAGIC ) ) {
     267           3 :     FD_LOG_WARNING(( "bad magic" ));
     268           3 :     return NULL;
     269           3 :   }
     270             : 
     271             :   /* TODO: consider testing owner */
     272             : 
     273         117 :   FD_COMPILER_MFENCE();
     274         117 :   FD_VOLATILE( wksp->magic ) = 0UL;
     275         117 :   FD_COMPILER_MFENCE();
     276             : 
     277             : # if FD_HAS_DEEPASAN
     278             :   /* Unpoison entire wksp region. */
     279             :   ulong footprint = fd_wksp_footprint( wksp->part_max, wksp->data_max );
     280             :   void * wksp_data = (void*)((ulong)wksp + fd_wksp_private_pinfo_off());
     281             :   fd_asan_unpoison( wksp_data, footprint - fd_wksp_private_pinfo_off());
     282             : # endif
     283             : 
     284         117 :   return wksp;
     285         120 : }
     286             : 
     287           3 : char const * fd_wksp_name    ( fd_wksp_t const * wksp ) { return wksp->name;     }
     288         111 : uint         fd_wksp_seed    ( fd_wksp_t const * wksp ) { return wksp->seed;     }
     289           6 : ulong        fd_wksp_part_max( fd_wksp_t const * wksp ) { return wksp->part_max; }
     290           6 : ulong        fd_wksp_data_max( fd_wksp_t const * wksp ) { return wksp->data_max; }
     291           6 : ulong        fd_wksp_gaddr_lo( fd_wksp_t const * wksp ) { return wksp->gaddr_lo; }
     292         153 : ulong        fd_wksp_gaddr_hi( fd_wksp_t const * wksp ) { return wksp->gaddr_hi; }
     293             : 
     294             : ulong
     295           0 : fd_wksp_owner( fd_wksp_t const * wksp ) {
     296           0 :   FD_COMPILER_MFENCE();
     297           0 :   ulong owner = FD_VOLATILE_CONST( wksp->owner );
     298           0 :   FD_COMPILER_MFENCE();
     299           0 :   return owner;
     300           0 : }
     301             : 
     302             : char const *
     303          48 : fd_wksp_strerror( int err ) {
     304          48 :   switch( err ) {
     305           3 :   case FD_WKSP_SUCCESS:     return "success";
     306          12 :   case FD_WKSP_ERR_INVAL:   return "inval";
     307          30 :   case FD_WKSP_ERR_FAIL:    return "fail";
     308           3 :   case FD_WKSP_ERR_CORRUPT: return "corrupt";
     309           0 :   default: break;
     310          48 :   }
     311           0 :   return "unknown";
     312          48 : }
     313             : 
     314             : int
     315          24 : fd_wksp_verify( fd_wksp_t * wksp ) {
     316             : 
     317      402399 : # define TEST(c) do {                                                                             \
     318      402399 :     if( FD_UNLIKELY( !(c) ) ) { FD_LOG_WARNING(( "FAIL: %s", #c )); return FD_WKSP_ERR_CORRUPT; } \
     319      402399 :   } while(0)
     320             : 
     321             :   /* Validate metadata */
     322             : 
     323          24 :   TEST( wksp );
     324          24 :   TEST( wksp->magic==FD_WKSP_MAGIC );
     325             : 
     326          24 :   ulong part_max = wksp->part_max;
     327          24 :   ulong data_max = wksp->data_max;
     328          24 :   TEST( fd_wksp_footprint( part_max, data_max ) );
     329             : 
     330          24 :   ulong gaddr_lo = wksp->gaddr_lo; TEST( gaddr_lo==fd_wksp_private_data_off( part_max ) );
     331          24 :   ulong gaddr_hi = wksp->gaddr_hi; TEST( gaddr_hi==gaddr_lo+data_max                    );
     332             : 
     333          24 :   TEST( fd_shmem_name_len( wksp->name ) );
     334             : 
     335             :   /* seed is arbitrary */
     336             : 
     337          24 :   TEST( wksp->cycle_tag >= 4UL );
     338             : 
     339             :   /* TODO: consider verifying owner */
     340             : 
     341          24 :   fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
     342             : 
     343             :   /* Clear out cycle tags */
     344             : 
     345      197952 :   for( ulong i=0UL; i<part_max; i++ ) pinfo[ i ].cycle_tag = 0UL;
     346             : 
     347             :   /* Verify the idle stack */
     348             : 
     349          24 :   ulong idle_cnt = 0UL;
     350             : 
     351          24 :   do {
     352          24 :     ulong i = fd_wksp_private_pinfo_idx( wksp->idle_top_cidx );
     353      197421 :     while( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     354             : 
     355             :       /* Visit i.  Note that i has not been validated yet. */
     356             : 
     357      197397 :       TEST( i<part_max            ); /* Validate i */
     358      197397 :       TEST( !pinfo[ i ].cycle_tag ); /* Make sure not visited before */
     359      197397 :       pinfo[ i ].cycle_tag = 1UL;    /* Mark as visited in idle stack */
     360      197397 :       idle_cnt++;                    /* Update the idle cnt */
     361             : 
     362             :       /* Advance to the next idle */
     363             : 
     364      197397 :       i = fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx );
     365      197397 :     }
     366          24 :   } while(0);
     367             : 
     368             :   /* Idle stack looks intact, verify partitioning */
     369             : 
     370          24 :   ulong free_cnt = 0UL;
     371          24 :   ulong used_cnt = 0UL;
     372             : 
     373          24 :   do {
     374          24 :     ulong j = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     375          24 :     ulong i = fd_wksp_private_pinfo_idx( wksp->part_head_cidx );
     376          24 :     ulong g = gaddr_lo;
     377             : 
     378          24 :     int last_free = 0;
     379             : 
     380         555 :     while( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     381             : 
     382             :       /* At this point, we last visited j.  Visit i.  Note that j has
     383             :          been validated but i has not. */
     384             : 
     385         531 :       TEST( i<part_max                                           ); /* Validate i */
     386         531 :       TEST( pinfo[ i ].gaddr_lo==g                               ); /* Make sure partition is tightly adjacent to previous */
     387         531 :       TEST( pinfo[ i ].gaddr_hi> g                               ); /* Make sure partition size is non-zero */
     388         531 :       TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].prev_cidx )==j ); /* Make sure correct prev partition */
     389         531 :       TEST( !pinfo[ i ].cycle_tag                                ); /* Make sure not visited before */
     390         531 :       pinfo[ i ].cycle_tag = 2UL;                                   /* Mark as visited in partitioning */
     391             : 
     392         531 :       g = pinfo[ i ].gaddr_hi;                                      /* Extract where the next partition should start */
     393         531 :       int is_free = !pinfo[ i ].tag;                                /* Determine if this partition is free or used */
     394         531 :       TEST( !(last_free & is_free) );                               /* Make sure no adjacent free partitions */
     395         531 :       free_cnt += (ulong) is_free;                                  /* Update the free cnt */
     396         531 :       used_cnt += (ulong)!is_free;                                  /* Update the used cnt */
     397             : 
     398             :       /* Advance to the next partition */
     399             : 
     400         531 :       last_free = is_free;
     401             : 
     402         531 :       j = i;
     403         531 :       i = fd_wksp_private_pinfo_idx( pinfo[ i ].next_cidx );
     404         531 :     }
     405             : 
     406          24 :     TEST( fd_wksp_private_pinfo_idx( wksp->part_tail_cidx )==j ); /* Make sure correct partition tail */
     407          24 :     TEST( g==gaddr_hi );                                          /* Make sure complete partitioning */
     408          24 :     TEST( (idle_cnt + free_cnt + used_cnt)==part_max );           /* Make sure no lost idle partitions */
     409          24 :   } while(0);
     410             : 
     411             :   /* Idle stack and partitioning look intact, validate used treap */
     412             : 
     413          24 :   do {
     414          24 :     ulong visit_cnt = 0UL;
     415             : 
     416          24 :     ulong i = fd_wksp_private_pinfo_idx( wksp->part_used_cidx );
     417          24 :     ulong s = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     418          24 :     ulong g = gaddr_lo;
     419             : 
     420          24 :     if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     421          12 :       TEST( i<part_max );                                             /* Validate i */
     422          12 :       TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     423          12 :     }
     424             : 
     425         630 :     for(;;) {
     426             : 
     427             :       /* At this point i is and everything on stack is validated */
     428             : 
     429         630 :       if( fd_wksp_private_pinfo_idx_is_null( i ) ) {
     430         327 :         if( fd_wksp_private_pinfo_idx_is_null( s ) ) break; /* Done */
     431             : 
     432             :         /* Pop stack */
     433             : 
     434         303 :         i = s;
     435         303 :         s = fd_wksp_private_pinfo_idx( pinfo[ i ].stack_cidx );
     436             : 
     437             :         /* Visit i */
     438             : 
     439         303 :         ulong p = fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx ); /* Extract the parent */
     440             : 
     441         303 :         TEST( pinfo[ i ].gaddr_lo>=g    );            /* Make sure this starts after last visited */
     442         303 :         TEST( pinfo[ i ].tag            );            /* Make sure tagged as a used partition */
     443         303 :         TEST( pinfo[ i ].cycle_tag==2UL );            /* Make sure in partitioning and not visited yet this traversal */
     444         303 :         if( !fd_wksp_private_pinfo_idx_is_null( p ) ) /* Make sure heap property satisfied */
     445         291 :           TEST( pinfo[ p ].heap_prio >= pinfo[ i ].heap_prio );
     446             : 
     447         303 :         TEST( !pinfo[ i ].in_same );                  /* Make sure unique */
     448         303 :         TEST( fd_wksp_private_pinfo_idx_is_null( fd_wksp_private_pinfo_idx( pinfo[ i ].same_cidx ) ) ); /* " */
     449             : 
     450         303 :         pinfo[ i ].cycle_tag = 3UL;                   /* Mark as visited this traversal */
     451         303 :         visit_cnt++;                                  /* Update the visit cnt */
     452         303 :         g = pinfo[ i ].gaddr_hi;                      /* Get minimum start for next partition */
     453             : 
     454             :         /* Traverse the right subtree */
     455             : 
     456         303 :         p = i;
     457         303 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].right_cidx );
     458         303 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     459         150 :           TEST( i<part_max );                                             /* Validate i */
     460         150 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==p ); /* Validate parent */
     461         150 :         }
     462             : 
     463         303 :       } else {
     464             : 
     465             :         /* At this point i and everything on the stack is validated.
     466             :            Push i to the stack and recurse on the left subtree. */
     467             : 
     468         303 :         pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( s );
     469         303 :         s = i;
     470         303 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].left_cidx );
     471         303 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     472         141 :           TEST( i<part_max );                                             /* Validate i */
     473         141 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     474         141 :         }
     475             : 
     476         303 :       }
     477         630 :     }
     478             : 
     479          24 :     TEST( visit_cnt==used_cnt ); /* Make sure all used partitions in used treap */
     480          24 :   } while(0);
     481             : 
     482             :   /* Idle stack, partitioning and used treap look intact, validate the
     483             :      free treap. */
     484             : 
     485          24 :   do {
     486          24 :     ulong visit_cnt = 0UL;
     487             : 
     488          24 :     ulong i  = fd_wksp_private_pinfo_idx( wksp->part_free_cidx );
     489          24 :     ulong s  = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     490          24 :     ulong sz = 0UL;
     491             : 
     492          24 :     if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     493          24 :       TEST( i<part_max );                                             /* Validate i */
     494          24 :       TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     495          24 :     }
     496             : 
     497         318 :     for(;;) {
     498             : 
     499             :       /* At this point i and everything on the stack is validated */
     500             : 
     501         318 :       if( fd_wksp_private_pinfo_idx_is_null( i ) ) {
     502         171 :         if( fd_wksp_private_pinfo_idx_is_null( s ) ) break; /* Done */
     503             : 
     504             :         /* Pop stack */
     505             : 
     506         147 :         i = s;
     507         147 :         s = fd_wksp_private_pinfo_idx( pinfo[ i ].stack_cidx );
     508             : 
     509             :         /* Visit i */
     510             : 
     511         147 :         ulong p   = fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx ); /* Extract the parent */
     512         147 :         ulong isz = fd_wksp_private_pinfo_sz( pinfo + i );               /* Extract the size */
     513             : 
     514         147 :         TEST( isz>sz              ); /* Make sure this partition i larger than previous */
     515         147 :         TEST( !pinfo[ i ].tag     ); /* Make sure tagged as a free partition */
     516         147 :         TEST( !pinfo[ i ].in_same ); /* Make sure marked as not in same */
     517             : 
     518         147 :         if( !fd_wksp_private_pinfo_idx_is_null( p ) ) { /* Make sure heap property satisfied */
     519         123 :           TEST( pinfo[ p ].heap_prio >= pinfo[ i ].heap_prio );
     520         123 :         }
     521             : 
     522         147 :         sz = isz; /* Update largest size partition seen so far */
     523             : 
     524             :         /* Traverse all same sized partitions */
     525             : 
     526         147 :         ulong j = i;
     527         228 :         for(;;) {
     528             : 
     529             :           /* At this point, j is validated */
     530             : 
     531         228 :           TEST( pinfo[ j ].cycle_tag==2UL ); /* Make sure in partitioning and not visited yet this traversal */
     532         228 :           pinfo[ j ].cycle_tag = 3UL;        /* Mark as visited this traversal */
     533         228 :           visit_cnt++;
     534             : 
     535         228 :           ulong k = fd_wksp_private_pinfo_idx( pinfo[ j ].same_cidx );    /* Get the next same sized */
     536         228 :           if( fd_wksp_private_pinfo_idx_is_null( k ) ) break;             /* If no more, we are done with this node */
     537          81 :           TEST( k<part_max );                                             /* Make sure valid index */
     538          81 :           TEST( fd_wksp_private_pinfo_sz( pinfo + k )==sz );              /* Make sure same size */
     539          81 :           TEST( pinfo[ k ].in_same );                                     /* Make sure marked as in same */
     540          81 :           TEST( fd_wksp_private_pinfo_idx_is_null( fd_wksp_private_pinfo_idx( pinfo[ k ].left_cidx  ) ) );
     541          81 :           TEST( fd_wksp_private_pinfo_idx_is_null( fd_wksp_private_pinfo_idx( pinfo[ k ].right_cidx ) ) );
     542          81 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ k ].parent_cidx )==j ); /* Make sure correct parent */
     543          81 :           j = k;
     544          81 :         }
     545             : 
     546             :         /* Recurse on the right subtree */
     547             : 
     548         147 :         p = i;
     549         147 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].right_cidx );
     550         147 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     551          60 :           TEST( i<part_max );                                             /* Validate i */
     552          60 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==p ); /* Validate parent */
     553          60 :         }
     554             : 
     555         147 :       } else {
     556             : 
     557         147 :         TEST( i<part_max ); /* Validate i */
     558             : 
     559             :         /* At this point i and everything on the stack is validated.
     560             :            Push i to the stack and recurse on the left subtree. */
     561             : 
     562         147 :         pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( s );
     563         147 :         s = i;
     564         147 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].left_cidx );
     565         147 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     566          63 :           TEST( i<part_max );                                             /* Validate i */
     567          63 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     568          63 :         }
     569         147 :       }
     570         318 :     }
     571             : 
     572          24 :     TEST( visit_cnt==free_cnt ); /* Make sure all free partitions in free treap */
     573             : 
     574          24 :   } while(0);
     575             : 
     576          24 : # undef TEST
     577             : 
     578          24 :   return FD_WKSP_SUCCESS;
     579          24 : }
     580             : 
     581             : int
     582             : fd_wksp_rebuild( fd_wksp_t * wksp,
     583         480 :                  uint        seed ) {
     584             : 
     585             :   /* Load the wksp metadata, don't rebuild if any of it looks even
     586             :      slightly off. */
     587             : 
     588         480 :   if( FD_UNLIKELY( !wksp ) ) {
     589           0 :     FD_LOG_WARNING(( "NULL wksp" ));
     590           0 :     return FD_WKSP_ERR_CORRUPT;
     591           0 :   }
     592             : 
     593         480 :   ulong magic     = wksp->magic;
     594         480 :   ulong part_max  = wksp->part_max;
     595         480 :   ulong data_max  = wksp->data_max;
     596         480 :   ulong gaddr_lo  = wksp->gaddr_lo;
     597         480 :   ulong gaddr_hi  = wksp->gaddr_hi;
     598         480 :   ulong cycle_tag = wksp->cycle_tag;
     599             : 
     600             :   /* TODO: consider verifying owner */
     601             : 
     602         480 :   ulong footprint    = fd_wksp_footprint( part_max, data_max );
     603         480 :   ulong gaddr_lo_exp = fd_wksp_private_data_off( part_max );
     604         480 :   ulong gaddr_hi_exp = gaddr_lo_exp + data_max;
     605         480 :   if( FD_UNLIKELY( (magic!=FD_WKSP_MAGIC) | (!footprint) | (!fd_shmem_name_len( wksp->name )) |
     606         480 :                    (gaddr_lo!=gaddr_lo_exp) | (gaddr_hi!=gaddr_hi_exp) | (cycle_tag<4UL) ) ) {
     607           0 :     FD_LOG_WARNING(( "bad metadata\n\t"
     608           0 :                      "magic     %016lx (exp %016lx)\n\t"
     609           0 :                      "part_max  %lu data_max %lu (footprint %lu)\n\t"
     610           0 :                      "gaddr_lo  %lu (exp %lu)\n\t"
     611           0 :                      "gaddr_hi  %lu (exp %lu)\n\t"
     612           0 :                      "cycle_tag %lu (exp>=4)",
     613           0 :                      magic, FD_WKSP_MAGIC, part_max, data_max, footprint,
     614           0 :                      gaddr_lo, gaddr_lo_exp, gaddr_hi, gaddr_hi_exp, cycle_tag ));
     615           0 :     return FD_WKSP_ERR_CORRUPT;
     616           0 :   }
     617             : 
     618             :   /* Scan the wksp pinfo and insert any used partitions into the used
     619             :      treap and put the rest on the idle stack.  If there is any sign of
     620             :      corruption (empty, bad range or overlap between used partitions),
     621             :      we abort the rebuild (this is almost certainly data corruption of
     622             :      some form and we don't have enough info to resolve a conflict
     623             :      without potentially making the situation worse).  We do the scan in
     624             :      reverse order to rebuild the idle stack in forward order.
     625             : 
     626             :      Note that we don't ever change the gaddr_lo,gaddr_hi of any tagged
     627             :      partitions such that operation is guaranteed to never change the
     628             :      single source of truth.  As such, this operation can be interrupted
     629             :      and restarted arbitrarily safely.*/
     630             : 
     631         480 :   fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
     632             : 
     633         480 :   do {
     634         480 :     wksp->seed           = seed;
     635         480 :     wksp->idle_top_cidx  = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL ); /* Flush idle stack */
     636         480 :     wksp->part_used_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL ); /* Flush used treap */
     637         480 :     wksp->part_free_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL ); /* Flush free treap */
     638             : 
     639         480 :     ulong i = part_max;
     640    12770673 :     while( i ) {
     641    12770193 :       i--;
     642             : 
     643             :       /* Ideally, heap priorities should just be a shuffling of the
     644             :          integers [0,part_max).  fd_uint_hash will generate such a
     645             :          shuffling for part_max = 2^32.  Using the lower 30 bits
     646             :          (reserving bit 31 for bulk operations) will yield something
     647             :          very close.  We use seed to mix it up some more. */
     648             : 
     649    12770193 :       pinfo[ i ].in_same    = 0U;
     650    12770193 :       pinfo[ i ].heap_prio  = fd_uint_hash( seed ^ (uint)i ) & ((1U<<30)-1U);
     651    12770193 :       pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     652    12770193 :       pinfo[ i ].cycle_tag  = 0U;
     653             : 
     654    12770193 :       ulong tag = pinfo[ i ].tag;
     655    12770193 :       if( !tag ) { /* Not used ... make it available for reuse below */
     656    12767994 :         fd_wksp_private_idle_stack_push( i, wksp, pinfo );
     657    12767994 :         continue;
     658    12767994 :       }
     659             : 
     660        2199 :       pinfo[ i ].prev_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     661        2199 :       pinfo[ i ].next_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     662             : 
     663        2199 :       if( FD_UNLIKELY( fd_wksp_private_used_treap_insert( i, wksp, pinfo ) ) ) return FD_WKSP_ERR_CORRUPT; /* Logs details */
     664        2199 :     }
     665         480 :   } while(0);
     666             : 
     667             :   /* At this point, a partition is either in the idle stack or used
     668             :      treap.  Further, we have:
     669             : 
     670             :                  | used                       | idle
     671             :        ----------+----------------------------+--------
     672             :        gaddr_*   | non-empty range            | 0
     673             :                  | no overlap with other used | 0
     674             :        tag       | non-zero                   | 0
     675             :        in_same   | 0                          | 0
     676             :        heap_prio | randomized                 | randomized
     677             :        prev      | NULL                       | NULL
     678             :        next      | NULL                       | NULL
     679             :        left      | used treap managed         | NULL
     680             :        right     | used treap managed         | NULL
     681             :        same      | used treap managed (NULL)  | NULL
     682             :        parent    | used treap managed         | idle stack managed
     683             :        stack     | wksp managed               | wksp managed
     684             :        cycle_tag | wksp managed               | wksp managed
     685             : 
     686             :      In-order traverse the used treap to rebuild the partitioning and
     687             :      the free treap. */
     688             : 
     689         480 :   do {
     690         480 :     uint * j_next_cidx_ptr = &wksp->part_head_cidx;  /* Location of most recently added partition next link */
     691             : 
     692         480 :     ulong  j  = FD_WKSP_PRIVATE_PINFO_IDX_NULL; /* Most recently added partition */
     693         480 :     ulong  g0 = gaddr_lo;                       /* Most recently added partition end */
     694             : 
     695         480 :     ulong i = fd_wksp_private_pinfo_idx( wksp->part_used_cidx );
     696         480 :     ulong s = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     697        4878 :     for(;;) {
     698        4878 :       if( fd_wksp_private_pinfo_idx_is_null( i ) ) {
     699        2679 :         if( fd_wksp_private_pinfo_idx_is_null( s ) ) break; /* Done */
     700             : 
     701             :         /* Pop traversal stack */
     702             : 
     703        2199 :         i = s;
     704        2199 :         s = fd_wksp_private_pinfo_idx( pinfo[ i ].stack_cidx );
     705             : 
     706             :         /* Visit i */
     707             : 
     708        2199 :         ulong g1 = pinfo[ i ].gaddr_lo;
     709        2199 :         if( g1 > g0 ) { /* There's a gap between i and the most recently added partition */
     710             : 
     711             :           /* Acquire an idle partition to hold the gap */
     712             : 
     713        1506 :           if( FD_UNLIKELY( fd_wksp_private_idle_stack_is_empty( wksp ) ) ) {
     714           0 :             FD_LOG_WARNING(( "part_max (%lu) too small to fill gap before partition %lu (tag %lu gaddr_lo %lu gaddr_hi %lu)",
     715           0 :                              part_max, i, pinfo[i].tag, pinfo[i].gaddr_lo, pinfo[i].gaddr_hi ));
     716           0 :             return FD_WKSP_ERR_CORRUPT;
     717           0 :           }
     718        1506 :           ulong k = fd_wksp_private_idle_stack_pop( wksp, pinfo );
     719             : 
     720             :           /* Populate the acquired partition with the gap details,
     721             :              append it to the wksp partitioning and insert it into the
     722             :              free treap.  Note that stack_push/pop reset gaddr_lo,
     723             :              gaddr_hi, tag, in_same, {prev, next, left, right, same,
     724             :              parent}_cidx.  It preserved heap_prio from its original
     725             :              assignment and didn't touch stack_cidx or cycle_tag. */
     726             : 
     727        1506 :           pinfo[ k ].gaddr_lo  = g0;
     728        1506 :           pinfo[ k ].gaddr_hi  = g1;
     729        1506 :           pinfo[ k ].prev_cidx = fd_wksp_private_pinfo_cidx( j );
     730        1506 :           *j_next_cidx_ptr = fd_wksp_private_pinfo_cidx( k );
     731        1506 :           j_next_cidx_ptr  = &pinfo[ k ].next_cidx;
     732        1506 :           j  = k;
     733        1506 :           g0 = g1;
     734             : 
     735        1506 :           fd_wksp_private_free_treap_insert( j, wksp, pinfo );
     736        1506 :         }
     737             : 
     738             :         /* Add i to the partitioning. */
     739             : 
     740        2199 :         pinfo[ i ].prev_cidx = fd_wksp_private_pinfo_cidx( j );
     741        2199 :         *j_next_cidx_ptr = fd_wksp_private_pinfo_cidx( i );
     742        2199 :         j_next_cidx_ptr  = &pinfo[ i ].next_cidx;
     743        2199 :         j  = i;
     744        2199 :         g0 = pinfo[ i ].gaddr_hi;
     745             : 
     746             :         /* Traverse the right subtree */
     747             : 
     748        2199 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].right_cidx );
     749             : 
     750        2199 :       } else {
     751             : 
     752             :         /* Push i to the stack and recurse on the left subtree. */
     753             : 
     754        2199 :         pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( s );
     755        2199 :         s = i;
     756        2199 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].left_cidx );
     757             : 
     758        2199 :       }
     759        4878 :     }
     760             : 
     761         480 :     if( g0 < gaddr_hi ) { /* Have final gap to fill */
     762             : 
     763             :       /* This works the same as the above */
     764             : 
     765         480 :       if( FD_UNLIKELY( fd_wksp_private_idle_stack_is_empty( wksp ) ) ) {
     766           0 :         FD_LOG_WARNING(( "part_max (%lu) too small to complete partitioning", part_max ));
     767           0 :         return FD_WKSP_ERR_CORRUPT;
     768           0 :       }
     769         480 :       ulong k = fd_wksp_private_idle_stack_pop( wksp, pinfo );
     770             : 
     771         480 :       pinfo[ k ].gaddr_lo  = g0;
     772         480 :       pinfo[ k ].gaddr_hi  = gaddr_hi;
     773         480 :       pinfo[ k ].prev_cidx = fd_wksp_private_pinfo_cidx( j );
     774         480 :       *j_next_cidx_ptr = fd_wksp_private_pinfo_cidx( k );
     775         480 :       j_next_cidx_ptr  = &pinfo[ k ].next_cidx;
     776         480 :       j  = k;
     777             :     //g0 = gaddr_hi;
     778             : 
     779         480 :       fd_wksp_private_free_treap_insert( j, wksp, pinfo );
     780         480 :     }
     781             : 
     782         480 :     wksp->part_tail_cidx = fd_wksp_private_pinfo_cidx( j );
     783             : 
     784         480 :   } while(0);
     785             : 
     786         480 :   return FD_WKSP_SUCCESS;
     787         480 : }

Generated by: LCOV version 1.14