LCOV - code coverage report
Current view: top level - util/wksp - fd_wksp_admin.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 377 433 87.1 %
Date: 2025-01-08 12:08:44 Functions: 16 18 88.9 %

          Line data    Source code
       1             : #include "fd_wksp_private.h"
       2             : #include <sys/mman.h>
       3             : #include <errno.h>
       4             : 
       5             : int
       6   587976717 : fd_wksp_private_lock( fd_wksp_t * wksp ) {
       7             : # if FD_WKSP_LOCK_RECLAIM
       8             :   int   warning = 0;
       9             : #endif
      10   587976717 :   ulong me      = fd_log_group_id();
      11             : 
      12   587976717 :   ulong * _owner = &wksp->owner;
      13   587976717 :   for(;;) {
      14             : 
      15             :     /* Note that we emulate CAS on platforms without FD_HAS_ATOMIC
      16             :        to minimize the amount of code differences we have to test.  On
      17             :        platforms without FD_HAS_ATOMIC, a workspace should not be used
      18             :        concurrently though. */
      19             : 
      20   587976717 :     FD_COMPILER_MFENCE();
      21   587976717 : #   if FD_HAS_ATOMIC
      22   587976717 :     ulong pid = FD_ATOMIC_CAS( _owner, ULONG_MAX, me );
      23             : #   else
      24             :     ulong pid = FD_VOLATILE_CONST( *_owner );
      25             :     if( pid==ULONG_MAX ) FD_VOLATILE( *_owner ) = me;
      26             : #   endif
      27   587976717 :     FD_COMPILER_MFENCE();
      28             : 
      29   587976717 :     if( FD_LIKELY( pid==ULONG_MAX ) ) return FD_WKSP_SUCCESS;
      30             : 
      31             : # if FD_WKSP_LOCK_RECLAIM
      32             :     int status = fd_log_group_id_query( pid );
      33             :     if( FD_UNLIKELY( status==FD_LOG_GROUP_ID_QUERY_DEAD ) ) { /* A process died while holding the lock, try to recover the lock */
      34             : 
      35             :       FD_COMPILER_MFENCE();
      36             : #     if FD_HAS_ATOMIC
      37             :       ulong cur = FD_ATOMIC_CAS( _owner, pid, me );
      38             : #     else
      39             :       ulong cur = FD_VOLATILE_CONST( *_owner );
      40             :       if( cur==pid ) FD_VOLATILE( *_owner ) = me;
      41             : #     endif
      42             :       FD_COMPILER_MFENCE();
      43             : 
      44             :       if( FD_LIKELY( cur==pid ) ) { /* We recovered the lock from the dead pid, try to fix up incomplete ops */
      45             : 
      46             :         FD_LOG_WARNING(( "Process %lu died in an operation on wksp %s; verifying", pid, wksp->name ));
      47             :         if( FD_LIKELY( !fd_wksp_verify( wksp ) ) ) { /* logs details of issues detected */
      48             :           FD_LOG_NOTICE(( "wksp verified" ));
      49             :           return FD_WKSP_SUCCESS;
      50             :         }
      51             : 
      52             :         FD_LOG_WARNING(( "Issues detected; rebuilding" ));
      53             :         if( FD_UNLIKELY( fd_wksp_rebuild( wksp, wksp->seed ) ) ) { /* Rebuild failed (logs details of issues detected) */
      54             :           /* Return control of the lock to the previous owner */
      55             :           FD_COMPILER_MFENCE();
      56             :           FD_VOLATILE( *_owner ) = pid;
      57             :           FD_COMPILER_MFENCE();
      58             :           FD_LOG_WARNING(( "corrupt wksp detected" ));
      59             :           return FD_WKSP_ERR_CORRUPT;
      60             :         }
      61             : 
      62             :         FD_LOG_NOTICE(( "wksp rebuilt" ));
      63             :         return FD_WKSP_SUCCESS;
      64             : 
      65             :       }
      66             : 
      67             :       /* Somebody beat us to recovering the lock ... try again */
      68             : 
      69             :     } else if( FD_UNLIKELY( status!=FD_LOG_GROUP_ID_QUERY_LIVE ) ) { /* Unclear pid status ... issue a warning and try again */
      70             : 
      71             :       if( FD_UNLIKELY( !warning ) ) {
      72             :         FD_LOG_WARNING(( "wksp %s is owned by unknown pid %li; attempting to recover", wksp->name, pid ));
      73             :         warning = 1;
      74             :       }
      75             : 
      76             :     }
      77             : 
      78             :     /* At this point, either another thread in this process has the
      79             :        lock, another active thread in another process has the lock,
      80             :        another unknown status thread in other process has the lock or
      81             :        another thread beat us to reclaim the lock from a dead process.
      82             :        In any case, we don't have the lock.  Wait a while to limit O/S
      83             :        contention and try again. */
      84             : 
      85             :     FD_YIELD();
      86             : # else
      87             : 
      88             :     /* If we are running without FD_WKSP_LOCK_RECLAIM then it is assumed
      89             :        that the contention is caused by a tile pinned to another core,
      90             :        and that this core is itself pinned so spin locking is best. */
      91           0 :     FD_SPIN_PAUSE();
      92             : 
      93           0 : #endif
      94           0 :   }
      95             : 
      96             :   /* never get here */
      97   587976717 : }
      98             : 
      99             : /* Public APIs ********************************************************/
     100             : 
     101             : ulong
     102             : fd_wksp_part_max_est( ulong footprint,
     103         558 :                       ulong sz_typical ) {
     104         558 :   footprint       = fd_ulong_align_dn( footprint, FD_WKSP_ALIGN );
     105         558 :   ulong data_end  = footprint - 1UL;
     106         558 :   ulong pinfo_off = fd_wksp_private_pinfo_off();
     107         558 :   ulong consumed  = sizeof(fd_wksp_private_pinfo_t) + sz_typical;
     108         558 :   ulong part_max  = (data_end - pinfo_off) / (consumed + (ulong)!consumed); /* avoid div-by-zero */
     109         558 :   if( FD_UNLIKELY( (!footprint) | (!sz_typical) | (sz_typical>consumed) | (pinfo_off>data_end) ) ) return 0UL;
     110         549 :   return fd_ulong_min( part_max, FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     111         558 : }
     112             : 
     113             : ulong
     114             : fd_wksp_data_max_est( ulong footprint,
     115         567 :                       ulong part_max ) {
     116         567 :   footprint = fd_ulong_align_dn( footprint, FD_WKSP_ALIGN );
     117         567 :   ulong data_end  = footprint - 1UL;
     118         567 :   ulong data_off  = fd_wksp_private_data_off( part_max );
     119         567 :   if( FD_UNLIKELY( (!part_max) | (part_max>FD_WKSP_PRIVATE_PINFO_IDX_NULL) |
     120         567 :                    (part_max > ((ULONG_MAX - fd_wksp_private_pinfo_off())/sizeof(fd_wksp_private_pinfo_t))) | /* covered above */
     121         567 :                    (!footprint) | (data_off>=data_end) ) ) return 0UL;
     122         549 :   return data_end - data_off;
     123         567 : }
     124             : 
     125             : ulong
     126           3 : fd_wksp_align( void ) {
     127           3 :   return FD_WKSP_ALIGN;
     128           3 : }
     129             : 
     130             : ulong
     131             : fd_wksp_footprint( ulong part_max,
     132        2850 :                    ulong data_max ) {
     133        2850 :   ulong data_off = fd_wksp_private_data_off( part_max );
     134        2850 :   if( FD_UNLIKELY( (!part_max) | (part_max>FD_WKSP_PRIVATE_PINFO_IDX_NULL) | (!data_max) |
     135        2850 :                    (part_max > ((ULONG_MAX - fd_wksp_private_pinfo_off())/sizeof(fd_wksp_private_pinfo_t))) | /* Covered above */
     136        2850 :                    (data_max > (ULONG_MAX - FD_WKSP_ALIGN + 1UL - data_off - 1UL)                         ) ) ) return 0UL;
     137        2781 :   return fd_ulong_align_up( data_off + data_max + 1UL, FD_WKSP_ALIGN );
     138        2850 : }
     139             : 
     140             : void *
     141             : fd_wksp_new( void *       shmem,
     142             :              char const * name,
     143             :              uint         seed,
     144             :              ulong        part_max,
     145         552 :              ulong        data_max ) {
     146         552 :   fd_wksp_t * wksp = (fd_wksp_t *)shmem;
     147             : 
     148         552 :   if( FD_UNLIKELY( !wksp ) ) {
     149           3 :     FD_LOG_WARNING(( "NULL shmem" ));
     150           3 :     return NULL;
     151           3 :   }
     152             : 
     153         549 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     154           3 :     FD_LOG_WARNING(( "bad align" ));
     155           3 :     return NULL;
     156           3 :   }
     157             : 
     158         546 :   ulong name_len = fd_shmem_name_len( name );
     159         546 :   if( FD_UNLIKELY( !name_len ) ) {
     160           3 :     FD_LOG_WARNING(( "bad name" ));
     161           3 :     return NULL;
     162           3 :   }
     163             : 
     164         543 :   ulong footprint = fd_wksp_footprint( part_max, data_max );
     165         543 :   if( FD_UNLIKELY( !footprint ) ) {
     166          12 :     FD_LOG_WARNING(( "bad part_max and/or data_max" ));
     167          12 :     return NULL;
     168          12 :   }
     169             : 
     170         531 :   fd_memset( wksp, 0, fd_wksp_footprint( part_max, 1UL ) );
     171             : 
     172         531 :   wksp->part_max       = part_max;
     173         531 :   wksp->data_max       = data_max;
     174         531 :   wksp->gaddr_lo       = fd_wksp_private_data_off( part_max );
     175         531 :   wksp->gaddr_hi       = wksp->gaddr_lo + data_max;
     176         531 :   fd_memcpy( wksp->name, name, name_len+1UL );
     177         531 :   wksp->seed           = seed;
     178         531 :   wksp->idle_top_cidx  = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     179         531 :   wksp->part_head_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     180         531 :   wksp->part_tail_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     181         531 :   wksp->part_used_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     182         531 :   wksp->part_free_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     183         531 :   wksp->cycle_tag      = 4UL;  /* Verify uses tags 0-3 */
     184         531 :   wksp->owner          = 0UL;  /* Mark as locked and in construction */
     185             : 
     186             :   /* Note that wksp->owner was set to zero above, "locking" the wksp by
     187             :      group_id 0.  And the memset above set all the partition tags to
     188             :      zero such that there are no allocated partitions.  So once we set
     189             :      magic below, we can finish the initialization by rebuilding and
     190             :      unlocking.  Since fd_log_group_id is non-zero, the zero owner
     191             :      indicates to any remote observer of the shared memory region that
     192             :      the wksp is being built for the first time. */
     193             : 
     194         531 :   FD_COMPILER_MFENCE();
     195         531 :   FD_VOLATILE( wksp->magic ) = FD_WKSP_MAGIC;
     196         531 :   FD_COMPILER_MFENCE();
     197             : 
     198         531 :   int err = fd_wksp_rebuild( wksp, seed );
     199         531 :   if( FD_UNLIKELY( err ) ) { /* Should be impossible at this point */
     200             : 
     201           0 :     FD_COMPILER_MFENCE();
     202           0 :     FD_VOLATILE( wksp->magic ) = 0UL;
     203           0 :     FD_COMPILER_MFENCE();
     204             : 
     205           0 :     FD_LOG_WARNING(( "fd_wksp_rebuild failed (%i-%s)", err, fd_wksp_strerror( err ) ));
     206           0 :     return NULL;
     207           0 :   }
     208             : 
     209             :   #if FD_HAS_DEEPASAN
     210             :   /* Poison entire workspace except wksp header and the pinfo array. */
     211             :   void * wksp_data = (void*)((ulong)wksp + fd_wksp_private_pinfo_off());
     212             :   fd_asan_poison( wksp_data, footprint - fd_wksp_private_pinfo_off() );
     213             :   fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
     214             :   for( ulong i=0; i<part_max; i++ ) {
     215             :     fd_asan_unpoison( &pinfo[ i ], FD_WKSP_PRIVATE_PINFO_FOOTPRINT );
     216             :   }
     217             :   #endif
     218             : 
     219         531 :   fd_wksp_private_unlock( wksp );
     220             : 
     221         531 :   return wksp;
     222         531 : }
     223             : 
     224             : fd_wksp_t *
     225        2346 : fd_wksp_join( void * shwksp ) {
     226        2346 :   fd_wksp_t * wksp = (fd_wksp_t *)shwksp;
     227             : 
     228        2346 :   if( FD_UNLIKELY( !wksp ) ) {
     229           3 :     FD_LOG_WARNING(( "NULL shwksp" ));
     230           3 :     return NULL;
     231           3 :   }
     232             : 
     233        2343 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     234           3 :     FD_LOG_WARNING(( "bad align" ));
     235           3 :     return NULL;
     236           3 :   }
     237             : 
     238        2340 :   if( FD_UNLIKELY( wksp->magic!=FD_WKSP_MAGIC ) ) {
     239           3 :     FD_LOG_WARNING(( "bad magic" ));
     240           3 :     return NULL;
     241           3 :   }
     242             : 
     243        2337 :   return wksp;
     244        2340 : }
     245             : 
     246             : void *
     247        2241 : fd_wksp_leave( fd_wksp_t * wksp ) {
     248        2241 :   if( FD_UNLIKELY( !wksp ) ) {
     249           3 :     FD_LOG_WARNING(( "NULL wksp" ));
     250           3 :     return NULL;
     251           3 :   }
     252             : 
     253        2238 :   return (void *)wksp;
     254        2241 : }
     255             : 
     256             : void *
     257         531 : fd_wksp_delete( void * shwksp ) {
     258         531 :   fd_wksp_t * wksp = (fd_wksp_t *)shwksp;
     259             : 
     260         531 :   if( FD_UNLIKELY( !wksp ) ) {
     261           3 :     FD_LOG_WARNING(( "NULL shwksp" ));
     262           3 :     return NULL;
     263           3 :   }
     264             : 
     265         528 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     266           3 :     FD_LOG_WARNING(( "bad align" ));
     267           3 :     return NULL;
     268           3 :   }
     269             : 
     270         525 :   if( FD_UNLIKELY( wksp->magic!=FD_WKSP_MAGIC ) ) {
     271           3 :     FD_LOG_WARNING(( "bad magic" ));
     272           3 :     return NULL;
     273           3 :   }
     274             : 
     275             :   /* TODO: consider testing owner */
     276             : 
     277         522 :   FD_COMPILER_MFENCE();
     278         522 :   FD_VOLATILE( wksp->magic ) = 0UL;
     279         522 :   FD_COMPILER_MFENCE();
     280             : 
     281             : # if FD_HAS_DEEPASAN
     282             :   /* Unpoison entire wksp region. */
     283             :   ulong footprint = fd_wksp_footprint( wksp->part_max, wksp->data_max );
     284             :   void * wksp_data = (void*)((ulong)wksp + fd_wksp_private_pinfo_off());
     285             :   fd_asan_unpoison( wksp_data, footprint - fd_wksp_private_pinfo_off());
     286             : # endif
     287             : 
     288         522 :   return wksp;
     289         525 : }
     290             : 
     291           3 : char const * fd_wksp_name    ( fd_wksp_t const * wksp ) { return wksp->name;     }
     292         111 : uint         fd_wksp_seed    ( fd_wksp_t const * wksp ) { return wksp->seed;     }
     293           3 : ulong        fd_wksp_part_max( fd_wksp_t const * wksp ) { return wksp->part_max; }
     294           3 : ulong        fd_wksp_data_max( fd_wksp_t const * wksp ) { return wksp->data_max; }
     295             : 
     296             : ulong
     297           0 : fd_wksp_owner( fd_wksp_t const * wksp ) {
     298           0 :   FD_COMPILER_MFENCE();
     299           0 :   ulong owner = FD_VOLATILE_CONST( wksp->owner );
     300           0 :   FD_COMPILER_MFENCE();
     301           0 :   return owner;
     302           0 : }
     303             : 
     304             : char const *
     305          48 : fd_wksp_strerror( int err ) {
     306          48 :   switch( err ) {
     307           3 :   case FD_WKSP_SUCCESS:     return "success";
     308          12 :   case FD_WKSP_ERR_INVAL:   return "inval";
     309          30 :   case FD_WKSP_ERR_FAIL:    return "fail";
     310           3 :   case FD_WKSP_ERR_CORRUPT: return "corrupt";
     311           0 :   default: break;
     312          48 :   }
     313           0 :   return "unknown";
     314          48 : }
     315             : 
     316             : int
     317          51 : fd_wksp_verify( fd_wksp_t * wksp ) {
     318             : 
     319      430491 : # define TEST(c) do {                                                                             \
     320      430491 :     if( FD_UNLIKELY( !(c) ) ) { FD_LOG_WARNING(( "FAIL: %s", #c )); return FD_WKSP_ERR_CORRUPT; } \
     321      430491 :   } while(0)
     322             : 
     323             :   /* Validate metadata */
     324             : 
     325          51 :   TEST( wksp );
     326          51 :   TEST( wksp->magic==FD_WKSP_MAGIC );
     327             : 
     328          51 :   ulong part_max = wksp->part_max;
     329          51 :   ulong data_max = wksp->data_max;
     330          51 :   TEST( fd_wksp_footprint( part_max, data_max ) );
     331             : 
     332          51 :   ulong gaddr_lo = wksp->gaddr_lo; TEST( gaddr_lo==fd_wksp_private_data_off( part_max ) );
     333          51 :   ulong gaddr_hi = wksp->gaddr_hi; TEST( gaddr_hi==gaddr_lo+data_max                    );
     334             : 
     335          51 :   TEST( fd_shmem_name_len( wksp->name ) );
     336             : 
     337             :   /* seed is arbitrary */
     338             : 
     339          51 :   TEST( wksp->cycle_tag >= 4UL );
     340             : 
     341             :   /* TODO: consider verifying owner */
     342             : 
     343          51 :   fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
     344             : 
     345             :   /* Clear out cycle tags */
     346             : 
     347      201381 :   for( ulong i=0UL; i<part_max; i++ ) pinfo[ i ].cycle_tag = 0UL;
     348             : 
     349             :   /* Verify the idle stack */
     350             : 
     351          51 :   ulong idle_cnt = 0UL;
     352             : 
     353          51 :   do {
     354          51 :     ulong i = fd_wksp_private_pinfo_idx( wksp->idle_top_cidx );
     355      199071 :     while( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     356             : 
     357             :       /* Visit i.  Note that i has not been validated yet. */
     358             : 
     359      199020 :       TEST( i<part_max            ); /* Validate i */
     360      199020 :       TEST( !pinfo[ i ].cycle_tag ); /* Make sure not visited before */
     361      199020 :       pinfo[ i ].cycle_tag = 1UL;    /* Mark as visited in idle stack */
     362      199020 :       idle_cnt++;                    /* Update the idle cnt */
     363             : 
     364             :       /* Advance to the next idle */
     365             : 
     366      199020 :       i = fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx );
     367      199020 :     }
     368          51 :   } while(0);
     369             : 
     370             :   /* Idle stack looks intact, verify partitioning */
     371             : 
     372          51 :   ulong free_cnt = 0UL;
     373          51 :   ulong used_cnt = 0UL;
     374             : 
     375          51 :   do {
     376          51 :     ulong j = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     377          51 :     ulong i = fd_wksp_private_pinfo_idx( wksp->part_head_cidx );
     378          51 :     ulong g = gaddr_lo;
     379             : 
     380          51 :     int last_free = 0;
     381             : 
     382        2361 :     while( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     383             : 
     384             :       /* At this point, we last visited j.  Visit i.  Note that j has
     385             :          been validated but i has not. */
     386             : 
     387        2310 :       TEST( i<part_max                                           ); /* Validate i */
     388        2310 :       TEST( pinfo[ i ].gaddr_lo==g                               ); /* Make sure partition is tightly adjacent to previous */
     389        2310 :       TEST( pinfo[ i ].gaddr_hi> g                               ); /* Make sure partition size is non-zero */
     390        2310 :       TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].prev_cidx )==j ); /* Make sure correct prev partition */
     391        2310 :       TEST( !pinfo[ i ].cycle_tag                                ); /* Make sure not visited before */
     392        2310 :       pinfo[ i ].cycle_tag = 2UL;                                   /* Mark as visited in partitioning */
     393             : 
     394        2310 :       g = pinfo[ i ].gaddr_hi;                                      /* Extract where the next partition should start */
     395        2310 :       int is_free = !pinfo[ i ].tag;                                /* Determine if this partition is free or used */
     396        2310 :       TEST( !(last_free & is_free) );                               /* Make sure no adjacent free partitions */
     397        2310 :       free_cnt += (ulong) is_free;                                  /* Update the free cnt */
     398        2310 :       used_cnt += (ulong)!is_free;                                  /* Update the used cnt */
     399             : 
     400             :       /* Advance to the next partition */
     401             : 
     402        2310 :       last_free = is_free;
     403             : 
     404        2310 :       j = i;
     405        2310 :       i = fd_wksp_private_pinfo_idx( pinfo[ i ].next_cidx );
     406        2310 :     }
     407             : 
     408          51 :     TEST( fd_wksp_private_pinfo_idx( wksp->part_tail_cidx )==j ); /* Make sure correct partition tail */
     409          51 :     TEST( g==gaddr_hi );                                          /* Make sure complete partitioning */
     410          51 :     TEST( (idle_cnt + free_cnt + used_cnt)==part_max );           /* Make sure no lost idle partitions */
     411          51 :   } while(0);
     412             : 
     413             :   /* Idle stack and partitioning look intact, validate used treap */
     414             : 
     415          51 :   do {
     416          51 :     ulong visit_cnt = 0UL;
     417             : 
     418          51 :     ulong i = fd_wksp_private_pinfo_idx( wksp->part_used_cidx );
     419          51 :     ulong s = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     420          51 :     ulong g = gaddr_lo;
     421             : 
     422          51 :     if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     423          39 :       TEST( i<part_max );                                             /* Validate i */
     424          39 :       TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     425          39 :     }
     426             : 
     427        2691 :     for(;;) {
     428             : 
     429             :       /* At this point i is and everything on stack is validated */
     430             : 
     431        2691 :       if( fd_wksp_private_pinfo_idx_is_null( i ) ) {
     432        1371 :         if( fd_wksp_private_pinfo_idx_is_null( s ) ) break; /* Done */
     433             : 
     434             :         /* Pop stack */
     435             : 
     436        1320 :         i = s;
     437        1320 :         s = fd_wksp_private_pinfo_idx( pinfo[ i ].stack_cidx );
     438             : 
     439             :         /* Visit i */
     440             : 
     441        1320 :         ulong p = fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx ); /* Extract the parent */
     442             : 
     443        1320 :         TEST( pinfo[ i ].gaddr_lo>=g    );            /* Make sure this starts after last visited */
     444        1320 :         TEST( pinfo[ i ].tag            );            /* Make sure tagged as a used partition */
     445        1320 :         TEST( pinfo[ i ].cycle_tag==2UL );            /* Make sure in partitioning and not visited yet this traversal */
     446        1320 :         if( !fd_wksp_private_pinfo_idx_is_null( p ) ) /* Make sure heap property satisfied */
     447        1281 :           TEST( pinfo[ p ].heap_prio >= pinfo[ i ].heap_prio );
     448             : 
     449        1320 :         TEST( !pinfo[ i ].in_same );                  /* Make sure unique */
     450        1320 :         TEST( fd_wksp_private_pinfo_idx_is_null( fd_wksp_private_pinfo_idx( pinfo[ i ].same_cidx ) ) ); /* " */
     451             : 
     452        1320 :         pinfo[ i ].cycle_tag = 3UL;                   /* Mark as visited this traversal */
     453        1320 :         visit_cnt++;                                  /* Update the visit cnt */
     454        1320 :         g = pinfo[ i ].gaddr_hi;                      /* Get minimum start for next partition */
     455             : 
     456             :         /* Traverse the right subtree */
     457             : 
     458        1320 :         p = i;
     459        1320 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].right_cidx );
     460        1320 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     461         672 :           TEST( i<part_max );                                             /* Validate i */
     462         672 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==p ); /* Validate parent */
     463         672 :         }
     464             : 
     465        1320 :       } else {
     466             : 
     467             :         /* At this point i and everything on the stack is validated.
     468             :            Push i to the stack and recurse on the left subtree. */
     469             : 
     470        1320 :         pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( s );
     471        1320 :         s = i;
     472        1320 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].left_cidx );
     473        1320 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     474         609 :           TEST( i<part_max );                                             /* Validate i */
     475         609 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     476         609 :         }
     477             : 
     478        1320 :       }
     479        2691 :     }
     480             : 
     481          51 :     TEST( visit_cnt==used_cnt ); /* Make sure all used partitions in used treap */
     482          51 :   } while(0);
     483             : 
     484             :   /* Idle stack, partitioning and used treap look intact, validate the
     485             :      free treap. */
     486             : 
     487          51 :   do {
     488          51 :     ulong visit_cnt = 0UL;
     489             : 
     490          51 :     ulong i  = fd_wksp_private_pinfo_idx( wksp->part_free_cidx );
     491          51 :     ulong s  = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     492          51 :     ulong sz = 0UL;
     493             : 
     494          51 :     if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     495          51 :       TEST( i<part_max );                                             /* Validate i */
     496          51 :       TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     497          51 :     }
     498             : 
     499        1209 :     for(;;) {
     500             : 
     501             :       /* At this point i and everything on the stack is validated */
     502             : 
     503        1209 :       if( fd_wksp_private_pinfo_idx_is_null( i ) ) {
     504         630 :         if( fd_wksp_private_pinfo_idx_is_null( s ) ) break; /* Done */
     505             : 
     506             :         /* Pop stack */
     507             : 
     508         579 :         i = s;
     509         579 :         s = fd_wksp_private_pinfo_idx( pinfo[ i ].stack_cidx );
     510             : 
     511             :         /* Visit i */
     512             : 
     513         579 :         ulong p   = fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx ); /* Extract the parent */
     514         579 :         ulong isz = fd_wksp_private_pinfo_sz( pinfo + i );               /* Extract the size */
     515             : 
     516         579 :         TEST( isz>sz              ); /* Make sure this partition i larger than previous */
     517         579 :         TEST( !pinfo[ i ].tag     ); /* Make sure tagged as a free partition */
     518         579 :         TEST( !pinfo[ i ].in_same ); /* Make sure marked as not in same */
     519             : 
     520         579 :         if( !fd_wksp_private_pinfo_idx_is_null( p ) ) { /* Make sure heap property satisfied */
     521         528 :           TEST( pinfo[ p ].heap_prio >= pinfo[ i ].heap_prio );
     522         528 :         }
     523             : 
     524         579 :         sz = isz; /* Update largest size partition seen so far */
     525             : 
     526             :         /* Traverse all same sized partitions */
     527             : 
     528         579 :         ulong j = i;
     529         990 :         for(;;) {
     530             : 
     531             :           /* At this point, j is validated */
     532             : 
     533         990 :           TEST( pinfo[ j ].cycle_tag==2UL ); /* Make sure in partitioning and not visited yet this traversal */
     534         990 :           pinfo[ j ].cycle_tag = 3UL;        /* Mark as visited this traversal */
     535         990 :           visit_cnt++;
     536             : 
     537         990 :           ulong k = fd_wksp_private_pinfo_idx( pinfo[ j ].same_cidx );    /* Get the next same sized */
     538         990 :           if( fd_wksp_private_pinfo_idx_is_null( k ) ) break;             /* If no more, we are done with this node */
     539         411 :           TEST( k<part_max );                                             /* Make sure valid index */
     540         411 :           TEST( fd_wksp_private_pinfo_sz( pinfo + k )==sz );              /* Make sure same size */
     541         411 :           TEST( pinfo[ k ].in_same );                                     /* Make sure marked as in same */
     542         411 :           TEST( fd_wksp_private_pinfo_idx_is_null( fd_wksp_private_pinfo_idx( pinfo[ k ].left_cidx  ) ) );
     543         411 :           TEST( fd_wksp_private_pinfo_idx_is_null( fd_wksp_private_pinfo_idx( pinfo[ k ].right_cidx ) ) );
     544         411 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ k ].parent_cidx )==j ); /* Make sure correct parent */
     545         411 :           j = k;
     546         411 :         }
     547             : 
     548             :         /* Recurse on the right subtree */
     549             : 
     550         579 :         p = i;
     551         579 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].right_cidx );
     552         579 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     553         270 :           TEST( i<part_max );                                             /* Validate i */
     554         270 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==p ); /* Validate parent */
     555         270 :         }
     556             : 
     557         579 :       } else {
     558             : 
     559         579 :         TEST( i<part_max ); /* Validate i */
     560             : 
     561             :         /* At this point i and everything on the stack is validated.
     562             :            Push i to the stack and recurse on the left subtree. */
     563             : 
     564         579 :         pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( s );
     565         579 :         s = i;
     566         579 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].left_cidx );
     567         579 :         if( !fd_wksp_private_pinfo_idx_is_null( i ) ) {
     568         258 :           TEST( i<part_max );                                             /* Validate i */
     569         258 :           TEST( fd_wksp_private_pinfo_idx( pinfo[ i ].parent_cidx )==s ); /* Validate parent */
     570         258 :         }
     571         579 :       }
     572        1209 :     }
     573             : 
     574          51 :     TEST( visit_cnt==free_cnt ); /* Make sure all free partitions in free treap */
     575             : 
     576          51 :   } while(0);
     577             : 
     578          51 : # undef TEST
     579             : 
     580          51 :   return FD_WKSP_SUCCESS;
     581          51 : }
     582             : 
     583             : int
     584             : fd_wksp_rebuild( fd_wksp_t * wksp,
     585        1215 :                  uint        seed ) {
     586             : 
     587             :   /* Load the wksp metadata, don't rebuild if any of it looks even
     588             :      slightly off. */
     589             : 
     590        1215 :   if( FD_UNLIKELY( !wksp ) ) {
     591           0 :     FD_LOG_WARNING(( "NULL wksp" ));
     592           0 :     return FD_WKSP_ERR_CORRUPT;
     593           0 :   }
     594             : 
     595        1215 :   ulong magic     = wksp->magic;
     596        1215 :   ulong part_max  = wksp->part_max;
     597        1215 :   ulong data_max  = wksp->data_max;
     598        1215 :   ulong gaddr_lo  = wksp->gaddr_lo;
     599        1215 :   ulong gaddr_hi  = wksp->gaddr_hi;
     600        1215 :   ulong cycle_tag = wksp->cycle_tag;
     601             : 
     602             :   /* TODO: consider verifying owner */
     603             : 
     604        1215 :   ulong footprint    = fd_wksp_footprint( part_max, data_max );
     605        1215 :   ulong gaddr_lo_exp = fd_wksp_private_data_off( part_max );
     606        1215 :   ulong gaddr_hi_exp = gaddr_lo_exp + data_max;
     607        1215 :   if( FD_UNLIKELY( (magic!=FD_WKSP_MAGIC) | (!footprint) | (!fd_shmem_name_len( wksp->name )) |
     608        1215 :                    (gaddr_lo!=gaddr_lo_exp) | (gaddr_hi!=gaddr_hi_exp) | (cycle_tag<4UL) ) ) {
     609           0 :     FD_LOG_WARNING(( "bad metadata\n\t"
     610           0 :                      "magic     %016lx (exp %016lx)\n\t"
     611           0 :                      "part_max  %lu data_max %lu (footprint %lu)\n\t"
     612           0 :                      "gaddr_lo  %lu (exp %lu)\n\t"
     613           0 :                      "gaddr_hi  %lu (exp %lu)\n\t"
     614           0 :                      "cycle_tag %lu (exp>=4)",
     615           0 :                      magic, FD_WKSP_MAGIC, part_max, data_max, footprint,
     616           0 :                      gaddr_lo, gaddr_lo_exp, gaddr_hi, gaddr_hi_exp, cycle_tag ));
     617           0 :     return FD_WKSP_ERR_CORRUPT;
     618           0 :   }
     619             : 
     620             :   /* Scan the wksp pinfo and insert any used partitions into the used
     621             :      treap and put the rest on the idle stack.  If there is any sign of
     622             :      corruption (empty, bad range or overlap between used partitions),
     623             :      we abort the rebuild (this is almost certainly data corruption of
     624             :      some form and we don't have enough info to resolve a conflict
     625             :      without potentially making the situation worse).  We do the scan in
     626             :      reverse order to rebuild the idle stack in forward order.
     627             : 
     628             :      Note that we don't ever change the gaddr_lo,gaddr_hi of any tagged
     629             :      partitions such that operation is guaranteed to never change the
     630             :      single source of truth.  As such, this operation can be interrupted
     631             :      and restarted arbitrarily safely.*/
     632             : 
     633        1215 :   fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
     634             : 
     635        1215 :   do {
     636        1215 :     wksp->seed           = seed;
     637        1215 :     wksp->idle_top_cidx  = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL ); /* Flush idle stack */
     638        1215 :     wksp->part_used_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL ); /* Flush used treap */
     639        1215 :     wksp->part_free_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL ); /* Flush free treap */
     640             : 
     641        1215 :     ulong i = part_max;
     642    35916186 :     while( i ) {
     643    35914971 :       i--;
     644             : 
     645             :       /* Ideally, heap priorities should just be a shuffling of the
     646             :          integers [0,part_max).  fd_uint_hash will generate such a
     647             :          shuffling for part_max = 2^32.  Using the lower 30 bits
     648             :          (reserving bit 31 for bulk operations) will yield something
     649             :          very close.  We use seed to mix it up some more. */
     650             : 
     651    35914971 :       pinfo[ i ].in_same    = 0U;
     652    35914971 :       pinfo[ i ].heap_prio  = fd_uint_hash( seed ^ (uint)i ) & ((1U<<30)-1U);
     653    35914971 :       pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     654    35914971 :       pinfo[ i ].cycle_tag  = 0U;
     655             : 
     656    35914971 :       ulong tag = pinfo[ i ].tag;
     657    35914971 :       if( !tag ) { /* Not used ... make it available for reuse below */
     658    35906103 :         fd_wksp_private_idle_stack_push( i, wksp, pinfo );
     659    35906103 :         continue;
     660    35906103 :       }
     661             : 
     662        8868 :       pinfo[ i ].prev_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     663        8868 :       pinfo[ i ].next_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     664             : 
     665        8868 :       if( FD_UNLIKELY( fd_wksp_private_used_treap_insert( i, wksp, pinfo ) ) ) return FD_WKSP_ERR_CORRUPT; /* Logs details */
     666        8868 :     }
     667        1215 :   } while(0);
     668             : 
     669             :   /* At this point, a partition is either in the idle stack or used
     670             :      treap.  Further, we have:
     671             : 
     672             :                  | used                       | idle
     673             :        ----------+----------------------------+--------
     674             :        gaddr_*   | non-empty range            | 0
     675             :                  | no overlap with other used | 0
     676             :        tag       | non-zero                   | 0
     677             :        in_same   | 0                          | 0
     678             :        heap_prio | randomized                 | randomized
     679             :        prev      | NULL                       | NULL
     680             :        next      | NULL                       | NULL
     681             :        left      | used treap managed         | NULL
     682             :        right     | used treap managed         | NULL
     683             :        same      | used treap managed (NULL)  | NULL
     684             :        parent    | used treap managed         | idle stack managed
     685             :        stack     | wksp managed               | wksp managed
     686             :        cycle_tag | wksp managed               | wksp managed
     687             : 
     688             :      In-order traverse the used treap to rebuild the partitioning and
     689             :      the free treap. */
     690             : 
     691        1215 :   do {
     692        1215 :     uint * j_next_cidx_ptr = &wksp->part_head_cidx;  /* Location of most recently added partition next link */
     693             : 
     694        1215 :     ulong  j  = FD_WKSP_PRIVATE_PINFO_IDX_NULL; /* Most recently added partition */
     695        1215 :     ulong  g0 = gaddr_lo;                       /* Most recently added partition end */
     696             : 
     697        1215 :     ulong i = fd_wksp_private_pinfo_idx( wksp->part_used_cidx );
     698        1215 :     ulong s = FD_WKSP_PRIVATE_PINFO_IDX_NULL;
     699       18951 :     for(;;) {
     700       18951 :       if( fd_wksp_private_pinfo_idx_is_null( i ) ) {
     701       10083 :         if( fd_wksp_private_pinfo_idx_is_null( s ) ) break; /* Done */
     702             : 
     703             :         /* Pop traversal stack */
     704             : 
     705        8868 :         i = s;
     706        8868 :         s = fd_wksp_private_pinfo_idx( pinfo[ i ].stack_cidx );
     707             : 
     708             :         /* Visit i */
     709             : 
     710        8868 :         ulong g1 = pinfo[ i ].gaddr_lo;
     711        8868 :         if( g1 > g0 ) { /* There's a gap between i and the most recently added partition */
     712             : 
     713             :           /* Acquire an idle partition to hold the gap */
     714             : 
     715        6231 :           if( FD_UNLIKELY( fd_wksp_private_idle_stack_is_empty( wksp ) ) ) {
     716           0 :             FD_LOG_WARNING(( "part_max (%lu) too small to fill gap before partition %lu (tag %lu gaddr_lo %lu gaddr_hi %lu)",
     717           0 :                              part_max, i, pinfo[i].tag, pinfo[i].gaddr_lo, pinfo[i].gaddr_hi ));
     718           0 :             return FD_WKSP_ERR_CORRUPT;
     719           0 :           }
     720        6231 :           ulong k = fd_wksp_private_idle_stack_pop( wksp, pinfo );
     721             : 
     722             :           /* Populate the acquired partition with the gap details,
     723             :              append it to the wksp partitioning and insert it into the
     724             :              free treap.  Note that stack_push/pop reset gaddr_lo,
     725             :              gaddr_hi, tag, in_same, {prev, next, left, right, same,
     726             :              parent}_cidx.  It preserved heap_prio from its original
     727             :              assignment and didn't touch stack_cidx or cycle_tag. */
     728             : 
     729        6231 :           pinfo[ k ].gaddr_lo  = g0;
     730        6231 :           pinfo[ k ].gaddr_hi  = g1;
     731        6231 :           pinfo[ k ].prev_cidx = fd_wksp_private_pinfo_cidx( j );
     732        6231 :           *j_next_cidx_ptr = fd_wksp_private_pinfo_cidx( k );
     733        6231 :           j_next_cidx_ptr  = &pinfo[ k ].next_cidx;
     734        6231 :           j  = k;
     735        6231 :           g0 = g1;
     736             : 
     737        6231 :           fd_wksp_private_free_treap_insert( j, wksp, pinfo );
     738        6231 :         }
     739             : 
     740             :         /* Add i to the partitioning. */
     741             : 
     742        8868 :         pinfo[ i ].prev_cidx = fd_wksp_private_pinfo_cidx( j );
     743        8868 :         *j_next_cidx_ptr = fd_wksp_private_pinfo_cidx( i );
     744        8868 :         j_next_cidx_ptr  = &pinfo[ i ].next_cidx;
     745        8868 :         j  = i;
     746        8868 :         g0 = pinfo[ i ].gaddr_hi;
     747             : 
     748             :         /* Traverse the right subtree */
     749             : 
     750        8868 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].right_cidx );
     751             : 
     752        8868 :       } else {
     753             : 
     754             :         /* Push i to the stack and recurse on the left subtree. */
     755             : 
     756        8868 :         pinfo[ i ].stack_cidx = fd_wksp_private_pinfo_cidx( s );
     757        8868 :         s = i;
     758        8868 :         i = fd_wksp_private_pinfo_idx( pinfo[ i ].left_cidx );
     759             : 
     760        8868 :       }
     761       18951 :     }
     762             : 
     763        1215 :     if( g0 < gaddr_hi ) { /* Have final gap to fill */
     764             : 
     765             :       /* This works the same as the above */
     766             : 
     767        1215 :       if( FD_UNLIKELY( fd_wksp_private_idle_stack_is_empty( wksp ) ) ) {
     768           0 :         FD_LOG_WARNING(( "part_max (%lu) too small to complete partitioning", part_max ));
     769           0 :         return FD_WKSP_ERR_CORRUPT;
     770           0 :       }
     771        1215 :       ulong k = fd_wksp_private_idle_stack_pop( wksp, pinfo );
     772             : 
     773        1215 :       pinfo[ k ].gaddr_lo  = g0;
     774        1215 :       pinfo[ k ].gaddr_hi  = gaddr_hi;
     775        1215 :       pinfo[ k ].prev_cidx = fd_wksp_private_pinfo_cidx( j );
     776        1215 :       *j_next_cidx_ptr = fd_wksp_private_pinfo_cidx( k );
     777        1215 :       j_next_cidx_ptr  = &pinfo[ k ].next_cidx;
     778        1215 :       j  = k;
     779             :     //g0 = gaddr_hi;
     780             : 
     781        1215 :       fd_wksp_private_free_treap_insert( j, wksp, pinfo );
     782        1215 :     }
     783             : 
     784        1215 :     wksp->part_tail_cidx = fd_wksp_private_pinfo_cidx( j );
     785             : 
     786        1215 :   } while(0);
     787             : 
     788        1215 :   return FD_WKSP_SUCCESS;
     789        1215 : }
     790             : 
     791             : void
     792           0 : fd_wksp_mprotect( fd_wksp_t * wksp, int flag ) {
     793           0 :   if( FD_UNLIKELY( !wksp ) ) {
     794           0 :     FD_LOG_WARNING(( "NULL wksp" ));
     795           0 :     return;
     796           0 :   }
     797             : 
     798           0 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)wksp, FD_WKSP_ALIGN ) ) ) {
     799           0 :     FD_LOG_WARNING(( "bad align" ));
     800           0 :     return;
     801           0 :   }
     802             : 
     803           0 :   if( FD_UNLIKELY( wksp->magic!=FD_WKSP_MAGIC ) ) {
     804           0 :     FD_LOG_WARNING(( "bad magic" ));
     805           0 :     return;
     806           0 :   }
     807             : 
     808           0 :   ulong wksp_footprint = fd_wksp_footprint( wksp->part_max, wksp->data_max );
     809           0 :   if( FD_UNLIKELY( mprotect( (void*)wksp, wksp_footprint, ( flag ? PROT_READ : (PROT_READ|PROT_WRITE) ) ) ) ) {
     810           0 :     FD_LOG_WARNING(( "mprotect failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     811           0 :     return;
     812           0 :   }
     813           0 : }

Generated by: LCOV version 1.14