LCOV - code coverage report
Current view: top level - util/wksp - fd_wksp_checkpt_v2.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 248 306 81.0 %
Date: 2025-01-08 12:08:44 Functions: 1 1 100.0 %

          Line data    Source code
       1             : #include "fd_wksp_private.h"
       2             : 
       3             : #include <errno.h>
       4             : #include <unistd.h>
       5             : #include <fcntl.h>
       6             : #include <sys/stat.h>
       7             : 
       8             : /* This is an implementation detail and not strictly part of the v2
       9             :    specification. */
      10             : 
      11          36 : #define FD_WKSP_CHECKPT_V2_CGROUP_MAX (1024UL)
      12             : 
      13             : int
      14             : fd_wksp_private_checkpt_v2( fd_tpool_t * tpool,
      15             :                             ulong        t0,
      16             :                             ulong        t1,
      17             :                             fd_wksp_t *  wksp,
      18             :                             char const * path,
      19             :                             ulong        mode,
      20             :                             char const * uinfo,
      21          36 :                             int          frame_style_compressed ) {
      22             : 
      23          36 :   (void)tpool; (void)t0; (void)t1; /* Thread parallelization not currently implemented */
      24             : 
      25          36 :   char const * binfo = fd_log_build_info;
      26             : 
      27          36 :   if( FD_UNLIKELY( !fd_checkpt_frame_style_is_supported( frame_style_compressed ) ) ) {
      28           0 :     FD_LOG_WARNING(( "compressed frames are not supported on this target" ));
      29           0 :     return FD_WKSP_ERR_INVAL;
      30           0 :   }
      31             : 
      32          36 :   int err_fail;
      33             : 
      34          36 :   int            locked  =  0;
      35          36 :   int            fd      = -1;
      36          36 :   fd_checkpt_t * checkpt = NULL;
      37             : 
      38          36 :   fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
      39             : 
      40          36 :   char const * name     = wksp->name;
      41          36 :   ulong        name_len = fd_shmem_name_len( name );
      42          36 :   if( FD_UNLIKELY( !name_len ) ) {
      43           0 :     FD_LOG_WARNING(( "checkpt wksp to \"%s\" failed due to bad name; attempting to continue", path ));
      44           0 :     err_fail = FD_WKSP_ERR_CORRUPT;
      45           0 :     goto fail;
      46           0 :   }
      47             : 
      48             :   /* Lock the wksp */
      49             : 
      50          36 :   {
      51          36 :     int _err = fd_wksp_private_lock( wksp ); /* logs details */
      52          36 :     if( FD_UNLIKELY( _err ) ) {
      53           0 :       FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed due to being locked; attempting to continue", name, path ));
      54           0 :       err_fail = _err;
      55           0 :       goto fail;
      56           0 :     }
      57          36 :     locked = 1;
      58          36 :   }
      59             : 
      60             :   /* Determine a reasonable number of cgroups (note: in principle we
      61             :      could thread parallelize this but it probably isn't worth the extra
      62             :      complexity). */
      63             : 
      64           0 :   ulong cgroup_cnt;
      65          36 :   ulong alloc_cnt = 0UL;
      66             : 
      67          36 :   {
      68         378 : #   define WKSP_TEST( c ) do {                                                                                  \
      69         378 :       if( FD_UNLIKELY( !(c) ) ) {                                                                               \
      70           0 :         FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed due to failing test %s; attempting to continue", \
      71           0 :                          name, path, #c ));                                                                     \
      72           0 :         err_fail = FD_WKSP_ERR_CORRUPT;                                                                         \
      73           0 :         goto fail;                                                                                              \
      74           0 :       }                                                                                                         \
      75         378 :     } while(0)
      76             : 
      77             :     /* Count the number of allocations by traversing over all partitions
      78             :        in reverse order by gaddr_lo (same iteration we will do to assign
      79             :        partitions to cgroups), validating as we go. */
      80             : 
      81          36 :     ulong part_max  = wksp->part_max;
      82          36 :     ulong data_lo   = wksp->gaddr_lo;
      83          36 :     ulong data_hi   = wksp->gaddr_hi;
      84          36 :     ulong cycle_tag = wksp->cycle_tag++;
      85             : 
      86          36 :     WKSP_TEST( (0UL<data_lo) & (data_lo<=data_hi) ); /* Valid data region */
      87             : 
      88          36 :     ulong gaddr_last = data_hi;
      89             : 
      90          36 :     ulong part_idx = fd_wksp_private_pinfo_idx( wksp->part_tail_cidx );
      91         138 :     while( !fd_wksp_private_pinfo_idx_is_null( part_idx ) ) {
      92             : 
      93             :       /* Load partition metadata and validate it */
      94             : 
      95         102 :       WKSP_TEST( part_idx<part_max );                      /* Valid idx */
      96         102 :       WKSP_TEST( pinfo[ part_idx ].cycle_tag!=cycle_tag ); /* No cycles */
      97         102 :       pinfo[ part_idx ].cycle_tag = cycle_tag;             /* Mark part_idx as visited */
      98             : 
      99         102 :       ulong gaddr_lo = pinfo[ part_idx ].gaddr_lo;
     100         102 :       ulong gaddr_hi = pinfo[ part_idx ].gaddr_hi;
     101         102 :       ulong tag      = pinfo[ part_idx ].tag;
     102             : 
     103         102 :       WKSP_TEST( (data_lo<=gaddr_lo) & (gaddr_lo<gaddr_hi) & (gaddr_hi==gaddr_last) ); /* Valid partition range */
     104         102 :       gaddr_last = gaddr_lo;
     105             : 
     106             :       /* If this partition holds an allocation, count it */
     107             : 
     108         102 :       alloc_cnt += (ulong)(tag>0UL);
     109             : 
     110             :       /* Advance to the previous partition */
     111             : 
     112         102 :       part_idx = fd_wksp_private_pinfo_idx( pinfo[ part_idx ].prev_cidx );
     113         102 :     }
     114             : 
     115          36 :     WKSP_TEST( gaddr_last==data_lo ); /* Complete partitioning */
     116             : 
     117             :     /* Compute a reasonable cgroup_cnt for alloc_cnt.  To do this,
     118             :        let N be the number of allocations.  We assume they have IID
     119             :        sizes with mean U and standard deviation S.  If we assign each
     120             :        allocation to 1 of M cgroups IID uniform random (we will do
     121             :        better below but we pessimize here), in the limit of N>>M>>1, a
     122             :        cgroup's load (total number of allocation bytes assigned to a
     123             :        cgroup to compress) is Gaussian (by central limit theorem) with
     124             :        mean (N/M)U with standard deviation sqrt(N/M) sqrt(U^2+S^2).
     125             : 
     126             :        That is, we are load balanced on average (yay) but there is some
     127             :        natural imbalance expected due to statistical fluctuations (boo).
     128             :        Noting that allocation sizes are positive, if we further assume
     129             :        that S<~U typically (note that it is theoretically possible to
     130             :        have a positive valued random variable with S arbitrarily larger
     131             :        than U), then the cgroup load standard deviation is typically
     132             :        less than sqrt(2N/M) U.
     133             : 
     134             :        The load for each cgroup will be approximately independent of
     135             :        each other for M>>1.  Extremal value statistics for a Gaussian
     136             :        then implies that the least loaded cgroup is typically likely to
     137             :        have more than (N/M)U - sqrt(2N/M) U sqrt(2 ln M) load.  We want
     138             :        this to be positive such such that the least loaded cgroup will
     139             :        typically have some load:
     140             : 
     141             :             (N/M)U >> sqrt((4 N ln M)/M))U
     142             :          -> (N/M)  >> 4 ln M
     143             : 
     144             :        That is, we want pick the number of cgroups such that the number
     145             :        of allocations per cgroup on average much greater than a few
     146             :        times the natural log of the number of cgroups.
     147             : 
     148             :        Given the number of cgroups is at most CGROUP_MAX ~ 1024, the
     149             :        above implies if we target more than ~28 allocations per cgroup
     150             :        on average, each cgroup is likely to get some load and cgroups
     151             :        will be reasonably load balanced on average.  We use 32 below for
     152             :        computational convenience. */
     153             : 
     154          36 :     cgroup_cnt = fd_ulong_min( (alloc_cnt+31UL)/32UL, FD_WKSP_CHECKPT_V2_CGROUP_MAX );
     155             : 
     156          36 : #   undef WKSP_TEST
     157          36 :   }
     158             : 
     159             :   /* Assign allocations to cgroups (note: in principle we could thread
     160             :      parallelize this but it also probably isn't worth the extra
     161             :      complexity). */
     162             : 
     163           0 :   uint  cgroup_head_cidx[ FD_WKSP_CHECKPT_V2_CGROUP_MAX ]; /* Head of a linked list for partitions assigned to each cgroup */
     164          36 :   ulong cgroup_alloc_cnt[ FD_WKSP_CHECKPT_V2_CGROUP_MAX ]; /* Number of partitions in each cgroup */
     165             : 
     166          36 :   {
     167             : 
     168             :     /* Initialize the cgroups to empty */
     169             : 
     170          36 :     ulong cgroup_load[ FD_WKSP_CHECKPT_V2_CGROUP_MAX ];
     171             : 
     172          36 :     uint null_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
     173          51 :     for( ulong cgroup_idx=0UL; cgroup_idx<cgroup_cnt; cgroup_idx++ ) {
     174          15 :       cgroup_head_cidx[ cgroup_idx ] = null_cidx;
     175          15 :       cgroup_alloc_cnt[ cgroup_idx ] = 0UL;
     176          15 :       cgroup_load     [ cgroup_idx ] = 0UL;
     177          15 :     }
     178             : 
     179             :     /* Configure cgroup sampling */
     180             : 
     181          36 :     ulong cgroup_cursor = 0UL;
     182          36 :     ulong cgroup_idx    = 0UL;
     183             : 
     184             :     /* For all partitions in reverse order by gaddr_lo */
     185             : 
     186          36 :     ulong part_idx = fd_wksp_private_pinfo_idx( wksp->part_tail_cidx );
     187         138 :     while( !fd_wksp_private_pinfo_idx_is_null( part_idx ) ) {
     188             : 
     189             :       /* Load partition metadata */
     190             : 
     191         102 :       ulong gaddr_lo = pinfo[ part_idx ].gaddr_lo;
     192         102 :       ulong gaddr_hi = pinfo[ part_idx ].gaddr_hi;
     193         102 :       ulong tag      = pinfo[ part_idx ].tag;
     194             : 
     195             :       /* If this partition holds an allocation, deterministically assign
     196             :          it to a cgroup in an approximately load balanced way such that
     197             :          the assignments will be identical for the same set of
     198             :          allocations and cgroup_cnt. */
     199             : 
     200         102 :       if( tag ) { /* ~50/50 */
     201             : 
     202             :         /* Sample a handful of cgroups and pick the least loaded to
     203             :            approximate a greedy load balance method.  We consider the
     204             :            most recently assigned cgroup (which was thought to be
     205             :            lightly loaded at the previous assignment), a cyclically
     206             :            sampled cgroup (ala striping) and two pseudo-randomly sampled
     207             :            cgroups based on the common hash of gaddr_lo (ala random
     208             :            assignment).  We don't care if our samples collide; we are
     209             :            just trying to improve on load balance over straight striping
     210             :            and random sampling (both of which are already asymptotically
     211             :            are load balanced as per the above).
     212             : 
     213             :            We could use a min-heap here but that would be
     214             :            algorithmically more expensive, more complex to implement and
     215             :            unlikely to improve load balance much futher (it would be
     216             :            the greedy load balance method, which is also asymptotically
     217             :            optimal but not perfect ... perfect load balance is a
     218             :            computationally hard knapsack like problem but pretty good
     219             :            load balance is easy). */
     220             : 
     221          51 :         {
     222          51 :           ulong h = fd_ulong_hash( gaddr_lo );
     223             : 
     224          51 :           ulong i0 = cgroup_idx;             ulong l0 = cgroup_load[ i0 ];
     225          51 :           ulong i1 = cgroup_cursor;          ulong l1 = cgroup_load[ i1 ];
     226          51 :           ulong i2 =  h        % cgroup_cnt; ulong l2 = cgroup_load[ i2 ];
     227          51 :           ulong i3 = (h >> 32) % cgroup_cnt; ulong l3 = cgroup_load[ i3 ];
     228             : 
     229          51 :           i0 = fd_ulong_if( l0<=l1, i0, i1 ); l0 = fd_ulong_min( l0, l1 );
     230          51 :           i1 = fd_ulong_if( l2<=l3, i2, i3 ); l1 = fd_ulong_min( l2, l3 );
     231          51 :           i0 = fd_ulong_if( l0<=l1, i0, i1 ); l0 = fd_ulong_min( l0, l1 );
     232             : 
     233          51 :           cgroup_cursor = fd_ulong_if( cgroup_cursor<cgroup_cnt-1UL, cgroup_cursor+1UL, 0UL );
     234          51 :           cgroup_idx    = i0;
     235          51 :         }
     236             : 
     237             :         /* Update this cgroup's partition count and load.  The load is
     238             :            currently the total uncompressed bytes of partition metadata
     239             :            and data (TODO: consider adding a fixed base cost here to
     240             :            account for fixed computational overheads too.  This would be
     241             :            an order of magnitude ballpark of the cost of doing 2
     242             :            fd_checkpt_buf relative to the marginal cost of checkpointing
     243             :            an additional byte for some representative target ... note
     244             :            that specific target details should not be incorporated into
     245             :            this because then specific checkpt byte stream would be
     246             :            sensitive to who wrote the checkpt and ideally checkpt should
     247             :            be bit-for-bit identical for identical wksp regardless of the
     248             :            target details). */
     249             : 
     250          51 :         cgroup_alloc_cnt[ cgroup_idx ]++;
     251          51 :         cgroup_load     [ cgroup_idx ] += 3UL*sizeof(ulong) + (gaddr_hi - gaddr_lo);
     252             : 
     253             :         /* Push this partition onto the cgroup's stack.  Since we are
     254             :            iterating over partitions in reverse order by gaddr_lo, the
     255             :            stack for each cgroup can be treated as a linked list in
     256             :            sorted order by gaddr_lo (helps with metdata
     257             :            compressibility). */
     258             : 
     259          51 :         pinfo[ part_idx ].stack_cidx   = cgroup_head_cidx[ cgroup_idx ];
     260          51 :         cgroup_head_cidx[ cgroup_idx ] = fd_wksp_private_pinfo_cidx( part_idx );
     261          51 :       }
     262             : 
     263             :       /* Advance to the previous partition */
     264             : 
     265         102 :       part_idx = fd_wksp_private_pinfo_idx( pinfo[ part_idx ].prev_cidx );
     266         102 :     }
     267          36 :   }
     268             : 
     269             :   /* At this point, each wksp partitions to checkpt have been assigned
     270             :      to a cgroup, the cgroups are approximately load balanced and the
     271             :      partitions for each cgroup are given in a singly linked list sorted
     272             :      in ascending order by gaddr_lo. */
     273             : 
     274             :   /* Create the checkpt file */
     275             : 
     276          36 :   {
     277          36 :     mode_t old_mask = umask( (mode_t)0 );
     278          36 :     fd = open( path, O_CREAT|O_EXCL|O_WRONLY, (mode_t)mode );
     279          36 :     umask( old_mask );
     280          36 :     if( FD_UNLIKELY( fd==-1 ) ) {
     281           9 :       FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed opening file with flags_O_CREAT|O_EXCL|O_WRONLY in mode 0%03lo "
     282           9 :                       "(%i-%s); attempting to continue", name, path, mode, errno, fd_io_strerror( errno ) ));
     283           9 :       err_fail = FD_WKSP_ERR_FAIL;
     284           9 :       goto fail;
     285           9 :     }
     286          36 :   }
     287             : 
     288             :   /* Initialize the checkpt */
     289             : 
     290          27 :   ulong frame_off[ FD_WKSP_CHECKPT_V2_CGROUP_MAX+6UL ];
     291          27 :   ulong frame_cnt = 0UL;
     292             : 
     293          27 :   fd_checkpt_t  _checkpt[ 1 ];
     294          27 :   uchar         wbuf[ FD_CHECKPT_WBUF_MIN ];
     295             : 
     296          27 :   checkpt = fd_checkpt_init_stream( _checkpt, fd, wbuf, FD_CHECKPT_WBUF_MIN ); /* logs details */
     297          27 :   if( FD_UNLIKELY( !checkpt ) ) {
     298           0 :     FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed when initializing; attempting to continue", name, path ));
     299           0 :     err_fail = FD_WKSP_ERR_FAIL;
     300           0 :     goto fail;
     301           0 :   }
     302             : 
     303         147 : # define CHECKPT_OPEN(frame_style) do {                                                                                \
     304         147 :     int _err = fd_checkpt_open_advanced( checkpt, (frame_style), &frame_off[ frame_cnt ] );                            \
     305         147 :     if( FD_UNLIKELY( _err ) ) {                                                                                        \
     306           0 :       FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed when opening a %s frame (%i-%s); attempting to continue", \
     307           0 :                        name, path, #frame_style, _err, fd_checkpt_strerror( _err ) ));                                 \
     308           0 :       err_fail = FD_WKSP_ERR_FAIL;                                                                                     \
     309           0 :       goto fail;                                                                                                       \
     310           0 :     }                                                                                                                  \
     311         147 :   } while(0)
     312             : 
     313         147 : # define CHECKPT_CLOSE() do {                                                                                       \
     314         147 :     frame_cnt++;                                                                                                    \
     315         147 :     int   _err = fd_checkpt_close_advanced( checkpt, &frame_off[ frame_cnt ] ); /* logs details */                  \
     316         147 :     if( FD_UNLIKELY( _err ) ) {                                                                                     \
     317           0 :       FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed when closing a frame (%i-%s); attempting to continue", \
     318           0 :                        name, path, _err, fd_checkpt_strerror( _err ) ));                                            \
     319           0 :       err_fail = FD_WKSP_ERR_FAIL;                                                                                  \
     320           0 :       goto fail;                                                                                                    \
     321           0 :     }                                                                                                               \
     322         147 :   } while(0)
     323             : 
     324             :   /* Note: sz must be at most FD_CHECKPT_META_MAX */
     325         105 : # define CHECKPT_META( meta, sz ) do {                                                                                \
     326         105 :     ulong _sz  = (sz);                                                                                                \
     327         105 :     int   _err = fd_checkpt_meta( checkpt, (meta), _sz ); /* logs details */                                          \
     328         105 :     if( FD_UNLIKELY( _err ) ) {                                                                                       \
     329           0 :       FD_LOG_WARNING(( "checkpt to \"%s\" failed when writing %lu bytes metadata %s (%i-%s); attempting to continue", \
     330           0 :                        path, _sz, #meta, _err, fd_checkpt_strerror( _err ) ));                                        \
     331           0 :       err_fail = FD_WKSP_ERR_FAIL;                                                                                    \
     332           0 :       goto fail;                                                                                                      \
     333           0 :     }                                                                                                                 \
     334         105 :   } while(0)
     335             : 
     336             :   /* Note: data must exist and be unchanged until frame close */
     337         201 : # define CHECKPT_DATA( data, sz ) do {                                                                            \
     338         201 :     ulong _sz  = (sz);                                                                                            \
     339         201 :     int   _err = fd_checkpt_data( checkpt, (data), _sz ); /* logs details */                                      \
     340         201 :     if( FD_UNLIKELY( _err ) ) {                                                                                   \
     341           0 :       FD_LOG_WARNING(( "checkpt to \"%s\" failed when writing %lu bytes data %s (%i-%s); attempting to continue", \
     342           0 :                        path, _sz, #data, _err, fd_checkpt_strerror( _err ) ));                                    \
     343           0 :       err_fail = FD_WKSP_ERR_FAIL;                                                                                \
     344           0 :       goto fail;                                                                                                  \
     345           0 :     }                                                                                                             \
     346         201 :   } while(0)
     347             : 
     348             :   /* Checkpt the header */
     349             : 
     350          27 :   {
     351          27 :     fd_wksp_checkpt_v2_hdr_t hdr[1];
     352             : 
     353          27 :     hdr->magic                  = wksp->magic;
     354          27 :     hdr->style                  = FD_WKSP_CHECKPT_STYLE_V2;
     355          27 :     hdr->frame_style_compressed = frame_style_compressed;
     356          27 :     hdr->reserved               = 0U;
     357          27 :     memset( hdr->name, 0,    FD_SHMEM_NAME_MAX ); /* Make sure trailing zeros clear */
     358          27 :     memcpy( hdr->name, name, name_len          );
     359          27 :     hdr->seed                   = wksp->seed;
     360          27 :     hdr->part_max               = wksp->part_max;
     361          27 :     hdr->data_max               = wksp->data_max;
     362             : 
     363          27 :     CHECKPT_OPEN( FD_CHECKPT_FRAME_STYLE_RAW );
     364          27 :     CHECKPT_DATA( hdr, sizeof(fd_wksp_checkpt_v2_hdr_t) );
     365          27 :     CHECKPT_CLOSE();
     366          27 :   }
     367             : 
     368             :   /* Checkpt the info */
     369             : 
     370          27 :   {
     371          27 :     fd_wksp_checkpt_v2_info_t info[1];
     372          27 :     char                      buf[ 65536 ];
     373          27 :     char *                    p = buf;
     374             : 
     375          27 :     info->mode      = mode;
     376          27 :     info->wallclock = fd_log_wallclock();
     377          27 :     info->app_id    = fd_log_app_id   ();
     378          27 :     info->thread_id = fd_log_thread_id();
     379          27 :     info->host_id   = fd_log_host_id  ();
     380          27 :     info->cpu_id    = fd_log_cpu_id   ();
     381          27 :     info->group_id  = fd_log_group_id ();
     382          27 :     info->tid       = fd_log_tid      ();
     383          27 :     info->user_id   = fd_log_user_id  ();
     384             : 
     385         243 : #   define APPEND_CSTR( field, cstr, len ) do { \
     386         243 :       ulong _len = (len);                       \
     387         243 :       memcpy( p, (cstr), _len );                \
     388         243 :       p[ _len ] = '\0';                         \
     389         243 :       info->sz_##field = _len + 1UL;            \
     390         243 :       p += info->sz_##field;                    \
     391         243 :     } while(0)
     392             : 
     393          27 :     APPEND_CSTR( app,    fd_log_app(),    strlen( fd_log_app()    ) ); /* appends at most FD_LOG_NAME_MAX ~ 40 B */
     394          27 :     APPEND_CSTR( thread, fd_log_thread(), strlen( fd_log_thread() ) ); /* " */
     395          27 :     APPEND_CSTR( host,   fd_log_host(),   strlen( fd_log_host()   ) ); /* " */
     396          27 :     APPEND_CSTR( cpu,    fd_log_cpu(),    strlen( fd_log_cpu()    ) ); /* " */
     397          27 :     APPEND_CSTR( group,  fd_log_group(),  strlen( fd_log_group()  ) ); /* " */
     398          27 :     APPEND_CSTR( user,   fd_log_user(),   strlen( fd_log_user()   ) ); /* " */
     399          27 :     APPEND_CSTR( path,   path,            strlen( path            ) ); /* appends at most PATH_MAX-1 ~ 4 KiB */
     400          27 :     APPEND_CSTR( binfo,  binfo,           fd_cstr_nlen( binfo, FD_WKSP_CHECKPT_V2_BINFO_MAX-1UL ) ); /* appends at most 16 KiB */
     401          27 :     APPEND_CSTR( uinfo,  uinfo,           fd_cstr_nlen( uinfo, FD_WKSP_CHECKPT_V2_UINFO_MAX-1UL ) ); /* " */
     402             : 
     403          27 : #   undef APPEND_CSTR
     404             : 
     405             :     /* Write the info */
     406             : 
     407          27 :     CHECKPT_OPEN( frame_style_compressed );
     408          27 :     CHECKPT_DATA( info, sizeof(fd_wksp_checkpt_v2_info_t) );
     409          27 :     CHECKPT_DATA( buf,  (ulong)(p-buf)                    );
     410          27 :     CHECKPT_CLOSE();
     411          27 :   }
     412             : 
     413             :   /* Checkpt the volume cgroups.  Note: This implementation just
     414             :      checkpoints 1 volume with at most CGROUP_MAX cgroup_cnt groups.
     415             : 
     416             :      Note: this loop can be parallelized over multiple threads if
     417             :      willing to leave holes in the file (and then maybe do a second pass
     418             :      to compact the holes or maybe do a planning pass and then a real
     419             :      pass or maybe leave the holes and do a second pass of run length
     420             :      and entropy coding or maybe write to separate files and distribute
     421             :      as a multiple files or maybe use non-POSIX filesystem mojo to
     422             :      stitch together the separate files to appear as one file or ...) */
     423             : 
     424          39 :   for( ulong cgroup_idx=0UL; cgroup_idx<cgroup_cnt; cgroup_idx++ ) {
     425             : 
     426          12 :     CHECKPT_OPEN( frame_style_compressed );
     427             : 
     428             :     /* Write cgroup commands */
     429             : 
     430          12 :     fd_wksp_checkpt_v2_cmd_t cmd[1];
     431             : 
     432          12 :     ulong part_idx = fd_wksp_private_pinfo_idx( cgroup_head_cidx[ cgroup_idx ] );
     433          51 :     while( !fd_wksp_private_pinfo_idx_is_null( part_idx ) ) {
     434             : 
     435             :       /* Command: "meta (tag,gaddr_lo,gaddr_hi)" */
     436             : 
     437          39 :       cmd->meta.tag      = pinfo[ part_idx ].tag;      /* Note: non-zero */
     438          39 :       cmd->meta.gaddr_lo = pinfo[ part_idx ].gaddr_lo;
     439          39 :       cmd->meta.gaddr_hi = pinfo[ part_idx ].gaddr_hi;
     440             : 
     441          39 :       CHECKPT_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
     442             : 
     443          39 :       part_idx = fd_wksp_private_pinfo_idx( pinfo[ part_idx ].stack_cidx );
     444          39 :     }
     445             : 
     446             :     /* Command: "corresponding data follows" */
     447             : 
     448          12 :     cmd->data.tag        = 0UL;
     449          12 :     cmd->data.cgroup_cnt = ULONG_MAX;
     450          12 :     cmd->data.frame_off  = ULONG_MAX;
     451             : 
     452          12 :     CHECKPT_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
     453             : 
     454             :     /* Write cgroup partition data */
     455             : 
     456          12 :     part_idx = fd_wksp_private_pinfo_idx( cgroup_head_cidx[ cgroup_idx ] );
     457          51 :     while( !fd_wksp_private_pinfo_idx_is_null( part_idx ) ) {
     458          39 :       ulong gaddr_lo = pinfo[ part_idx ].gaddr_lo;
     459          39 :       ulong gaddr_hi = pinfo[ part_idx ].gaddr_hi;
     460             : 
     461          39 :       CHECKPT_DATA( fd_wksp_laddr_fast( wksp, gaddr_lo ), gaddr_hi - gaddr_lo );
     462             : 
     463          39 :       part_idx = fd_wksp_private_pinfo_idx( pinfo[ part_idx ].stack_cidx );
     464          39 :     }
     465             : 
     466          12 :     CHECKPT_CLOSE();
     467             : 
     468          12 :   }
     469             : 
     470             :   /* Checkpt the volume appendix.  This starts with a command that
     471             :      indicates this frame is an appendix for cgroup_cnt cgroups (this
     472             :      can be used in a streaming restore to tell when it has reached the
     473             :      appendix and in a parallel restore of the appendix so a parallel
     474             :      restore thread knows how much it needs to decompress), the offsets
     475             :      of each cgroup frame (so parallel restore threads can seek to the
     476             :      partitions assigned to them) and the number of partitions in each
     477             :      cgroup frame (so that the pinfo on restore can be partitioned over
     478             :      parallel restore threads upfront non-atomically and
     479             :      deterministically). */
     480             : 
     481          27 :   {
     482          27 :     fd_wksp_checkpt_v2_cmd_t cmd[1];
     483             : 
     484             :     /* Command: "appendix for a volume with cgroup_cnt frames and no
     485             :        previous volumes" */
     486             : 
     487          27 :     cmd->appendix.tag        = 0UL;
     488          27 :     cmd->appendix.cgroup_cnt = cgroup_cnt;
     489          27 :     cmd->appendix.frame_off  = 0UL;
     490             : 
     491          27 :     CHECKPT_OPEN( frame_style_compressed );
     492          27 :     CHECKPT_META( cmd,              sizeof(fd_wksp_checkpt_v2_cmd_t) ); /* Note: must be meta for restore */
     493          27 :     CHECKPT_DATA( frame_off+2UL,    cgroup_cnt*sizeof(ulong)         );
     494          27 :     CHECKPT_DATA( cgroup_alloc_cnt, cgroup_cnt*sizeof(ulong)         );
     495          27 :     CHECKPT_CLOSE();
     496          27 :   }
     497             : 
     498             :   /* Checkpt the volumes frame */
     499             : 
     500          27 :   {
     501          27 :     fd_wksp_checkpt_v2_cmd_t cmd[1];
     502             : 
     503             :     /* Command: "no more volumes */
     504             : 
     505          27 :     cmd->volumes.tag        = 0UL;
     506          27 :     cmd->volumes.cgroup_cnt = ULONG_MAX;
     507          27 :     cmd->volumes.frame_off  = frame_off[ frame_cnt-1 ];
     508             : 
     509          27 :     CHECKPT_OPEN( frame_style_compressed );
     510          27 :     CHECKPT_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
     511          27 :     CHECKPT_CLOSE();
     512          27 :   }
     513             : 
     514             :   /* Checkpt the footer */
     515             : 
     516          27 :   {
     517          27 :     fd_wksp_checkpt_v2_ftr_t ftr[1];
     518             : 
     519             :     /* Command: "footer for a checkpt with cgroup_cnt total cgroup
     520             :        frames" */
     521             : 
     522          27 :     ftr->alloc_cnt                   = alloc_cnt;
     523          27 :     ftr->cgroup_cnt                  = cgroup_cnt;
     524          27 :     ftr->volume_cnt                  = 1UL;
     525          27 :     ftr->frame_off                   = frame_off[ frame_cnt-1U ];
     526          27 :     ftr->checkpt_sz                  = frame_off[ frame_cnt ] + sizeof(fd_wksp_checkpt_v2_ftr_t);
     527          27 :     ftr->data_max                    = wksp->data_max;
     528          27 :     ftr->part_max                    = wksp->part_max;
     529          27 :     ftr->seed                        = wksp->seed;
     530          27 :     memset( ftr->name, 0,    FD_SHMEM_NAME_MAX ); /* Make sure trailing zeros clear */
     531          27 :     memcpy( ftr->name, name, name_len          );
     532          27 :     ftr->reserved                    = 0U;
     533          27 :     ftr->frame_style_compressed      = frame_style_compressed;
     534          27 :     ftr->style                       = FD_WKSP_CHECKPT_STYLE_V2;
     535          27 :     ftr->unmagic                     = ~wksp->magic;
     536             : 
     537          27 :     CHECKPT_OPEN( FD_CHECKPT_FRAME_STYLE_RAW );
     538          27 :     CHECKPT_DATA( ftr, sizeof(fd_wksp_checkpt_v2_ftr_t) );
     539          27 :     CHECKPT_CLOSE();
     540          27 :   }
     541             : 
     542          27 : # undef CHECKPT_DATA
     543          27 : # undef CHECKPT_META
     544          27 : # undef CHECKPT_CLOSE
     545          27 : # undef CHECKPT_OPEN
     546             : 
     547             :   /* Finalize the checkpt */
     548             : 
     549          27 :   if( FD_UNLIKELY( !fd_checkpt_fini( checkpt ) ) ) { /* logs details */
     550           0 :     FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed when finalizing; attempting to continue", name, path ));
     551           0 :     checkpt  = NULL;
     552           0 :     err_fail = FD_WKSP_ERR_FAIL;
     553           0 :     goto fail;
     554           0 :   }
     555             : 
     556             :   /* Close the file */
     557             : 
     558          27 :   if( FD_UNLIKELY( close( fd ) ) ) {
     559           0 :     FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed when closing; attempting to continue", name, path ));
     560           0 :     fd       = -1;
     561           0 :     err_fail = FD_WKSP_ERR_FAIL;
     562           0 :     goto fail;
     563           0 :   }
     564             : 
     565             :   /* Unlock the wksp */
     566             : 
     567          27 :   fd_wksp_private_unlock( wksp );
     568          27 :   locked = 0;
     569             : 
     570          27 :   return FD_WKSP_SUCCESS;
     571             : 
     572           9 : fail:
     573             : 
     574             :   /* Release resources that might be reserved */
     575             : 
     576           9 :   if( FD_LIKELY( checkpt ) ) {
     577           0 :     if( FD_UNLIKELY( fd_checkpt_in_frame( checkpt ) ) && FD_UNLIKELY( fd_checkpt_close( checkpt ) ) )
     578           0 :       FD_LOG_WARNING(( "fd_checkpt_close failed; attempting to continue" ));
     579             : 
     580           0 :     if( FD_UNLIKELY( !fd_checkpt_fini( checkpt ) ) ) /* logs details */
     581           0 :       FD_LOG_WARNING(( "fd_checkpt_fini failed; attempting to continue" ));
     582           0 :   }
     583             : 
     584           9 :   if( FD_LIKELY( fd!=-1 ) && FD_UNLIKELY( close( fd ) ) )
     585           0 :     FD_LOG_WARNING(( "close(\"%s\") failed (%i-%s); attempting to continue", path, errno, fd_io_strerror( errno ) ));
     586             : 
     587           9 :   if( FD_LIKELY( locked ) ) fd_wksp_private_unlock( wksp );
     588             : 
     589           9 :   return err_fail;
     590          27 : }

Generated by: LCOV version 1.14