LCOV - code coverage report
Current view: top level - util/wksp - fd_wksp_restore_v2.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 378 584 64.7 %
Date: 2025-01-08 12:08:44 Functions: 9 10 90.0 %

          Line data    Source code
       1             : #include "fd_wksp_private.h"
       2             : 
       3             : #include <stdio.h>
       4             : #include <errno.h>
       5             : #include <unistd.h>
       6             : #include <fcntl.h>
       7             : #include <sys/stat.h>
       8             : 
       9             : /* This is an implementation detail and not strictly part of the v2
      10             :    specification. */
      11             : 
      12             : #define FD_WKSP_RESTORE_V2_CGROUP_MAX (1024UL)
      13             : 
      14             : /* Note: restore not in frame on entry, restore at off on exit.  Jumps
      15             :    to fail on error (logs details). */
      16             : 
      17         273 : #define RESTORE_SEEK(off) do {                                                          \
      18         273 :     ulong _off = (off);                                                                 \
      19         273 :     if( FD_UNLIKELY( fd_restore_seek( restore, _off ) ) ) goto fail; /* logs details */ \
      20         273 :   } while(0)
      21             : 
      22             : /* Note: restore not in frame and at start of frame on entry, restore in
      23             :    frame on exit.  Jumps to fail on error (logs details). */
      24             : 
      25         462 : #define RESTORE_OPEN(frame_style) do {                                                                                \
      26         462 :     if( FD_UNLIKELY( fd_restore_open_advanced( restore, (frame_style), &frame_off ) ) ) goto fail; /* logs details */ \
      27         462 :   } while(0)
      28             : 
      29             : /* Note: restore in frame on entry, restore just after frame on exit.
      30             :    Assumes frame fully processed.  Jumps to fail on error (logs
      31             :    details).  */
      32             : 
      33         462 : #define RESTORE_CLOSE() do {                                                                            \
      34         462 :     if( FD_UNLIKELY( fd_restore_close_advanced( restore, &frame_off ) ) ) goto fail; /* logs details */ \
      35         462 :   } while(0)
      36             : 
      37             : /* Note: restore in frame at meta and sz must be at most
      38             :    FD_RESTORE_META_MAX on entry, restore in frame at just past meta with
      39             :    meta ready on exit.  Jumps to fail on error (logs details). */
      40             : 
      41         348 : #define RESTORE_META( meta, sz ) do {                                        \
      42         348 :     ulong _sz  = (sz);                                                       \
      43         348 :     int   _err = fd_restore_meta( restore, (meta), _sz ); /* logs details */ \
      44         348 :     if( FD_UNLIKELY( _err ) ) {                                              \
      45           0 :       FD_LOG_WARNING(( "fd_restore_meta( %s, %lu ) failed (%i-%s)",          \
      46           0 :                        #meta, _sz, _err, fd_checkpt_strerror( _err ) ));     \
      47           0 :       goto fail;                                                             \
      48           0 :     }                                                                        \
      49         348 :   } while(0)
      50             : 
      51             : /* Note: restore in frame at data on entry, restore in frame just past
      52             :    data on exit, data potentially not ready until frame close and should
      53             :    exist untouched until then (logs details). */
      54             : 
      55         516 : #define RESTORE_DATA( data, sz ) do {                                        \
      56         516 :     ulong _sz  = (sz);                                                       \
      57         516 :     int   _err = fd_restore_data( restore, (data), _sz ); /* logs details */ \
      58         516 :     if( FD_UNLIKELY( _err ) ) {                                              \
      59           0 :       FD_LOG_WARNING(( "fd_restore_data( %s, %lu ) failed (%i-%s)",          \
      60           0 :                        #data, _sz, _err, fd_checkpt_strerror( _err ) ));     \
      61           0 :       goto fail;                                                             \
      62           0 :     }                                                                        \
      63         516 :   } while(0)
      64             : 
      65             : /* Note: jumps to fail if c is not true (logs details) */
      66             : 
      67        5022 : #define RESTORE_TEST( c ) do {                          \
      68        5022 :     if( FD_UNLIKELY( !(c) ) ) {                         \
      69           0 :       FD_LOG_WARNING(( "restore test %s failed", #c )); \
      70           0 :       goto fail;                                        \
      71           0 :     }                                                   \
      72        5022 :   } while(0)
      73             : 
      74             : /* fd_wksp_restore_v2_hdr restores the header frame from a wksp checkpt.
      75             :    Assumes restore is valid and at the frame start and hdr is valid.  On
      76             :    success, returns SUCCESS, *hdr will be populated with a valid data
      77             :    and restore will be just after the frame end.  On failure, returns
      78             :    FAIL, *hdr is clobbered and the caller should not assume anything
      79             :    about the restore state. */
      80             : 
      81             : static int
      82             : fd_wksp_restore_v2_hdr( fd_restore_t *             restore,
      83         126 :                         fd_wksp_checkpt_v2_hdr_t * hdr ) {
      84         126 :   ulong frame_off;
      85             : 
      86         126 :   RESTORE_OPEN( FD_CHECKPT_FRAME_STYLE_RAW );
      87         126 :   RESTORE_DATA( hdr, sizeof(fd_wksp_checkpt_v2_hdr_t) );
      88         126 :   RESTORE_CLOSE();
      89             : 
      90         126 :   ulong name_len = fd_shmem_name_len( hdr->name );
      91             :   /* FIXME: CHECK TRAILING 0 OF NAME? */
      92             : 
      93         126 :   RESTORE_TEST( hdr->magic==FD_WKSP_MAGIC                                          );
      94         126 :   RESTORE_TEST( hdr->style==FD_WKSP_CHECKPT_STYLE_V2                               );
      95         126 :   RESTORE_TEST( fd_checkpt_frame_style_is_supported( hdr->frame_style_compressed ) );
      96         126 :   RESTORE_TEST( hdr->reserved==0U                                                  );
      97         126 :   RESTORE_TEST( name_len>0UL                                                       );
      98             :   /* ignore seed (arbitrary) */
      99         126 :   RESTORE_TEST( fd_wksp_footprint( hdr->part_max, hdr->data_max )>0UL              );
     100             : 
     101         126 :   return FD_WKSP_SUCCESS;
     102             : 
     103           0 : fail:
     104           0 :   return FD_WKSP_ERR_FAIL;
     105         126 : }
     106             : 
     107             : /* fd_wksp_restore_v2_info restores the info frame from a wksp checkpt.
     108             :    Assumes restore is valid and at the frame start, hdr has info from
     109             :    the corresponding header, info_buf has room for buf_max bytes and
     110             :    info_cstr is valid.  On success, returns SUCCESS, *info will be
     111             :    populated with a valid data, info_cstr will be populated with
     112             :    pointers into info_buf to valid info cstr (indexed in the same order
     113             :    as the info fields) and restore will be just after the frame end.  On
     114             :    failure, returns FAIL, info, info_buf and info might be clobbered and
     115             :    the restore state is unknown. */
     116             : 
     117             : static int
     118             : fd_wksp_restore_v2_info( fd_restore_t *                   restore,
     119             :                          fd_wksp_checkpt_v2_hdr_t const * hdr,
     120             :                          fd_wksp_checkpt_v2_info_t *      info,
     121             :                          char *                           info_buf,
     122             :                          ulong                            info_buf_max,
     123         126 :                          char const *                     info_cstr[ 9 ] ) {
     124         126 :   ulong frame_off;
     125             : 
     126         126 :   RESTORE_OPEN( hdr->frame_style_compressed );
     127         126 :   RESTORE_META( info, sizeof(fd_wksp_checkpt_v2_info_t) );
     128         126 :   ulong info_buf_sz = info->sz_app
     129         126 :                     + info->sz_thread
     130         126 :                     + info->sz_host
     131         126 :                     + info->sz_cpu
     132         126 :                     + info->sz_group
     133         126 :                     + info->sz_user
     134         126 :                     + info->sz_path
     135         126 :                     + info->sz_binfo
     136         126 :                     + info->sz_uinfo;
     137         126 :   RESTORE_TEST( info_buf_sz<=info_buf_max );
     138         126 :   RESTORE_DATA( info_buf, info_buf_sz );
     139         126 :   RESTORE_CLOSE();
     140             : 
     141         126 :   char const * p = info_buf;
     142             : 
     143        1134 : # define NEXT( sz, max ) (__extension__({                   \
     144        1134 :     char const * _cstr = p;                                 \
     145        1134 :     ulong        _sz   = (sz);                              \
     146        1134 :     ulong        _max  = (max);                             \
     147        1134 :     RESTORE_TEST( (0UL<_sz) & (_sz<=_max) );                \
     148        1134 :     RESTORE_TEST( fd_cstr_nlen( _cstr, _max )==(_sz-1UL) ); \
     149        1134 :     p += _sz;                                               \
     150        1134 :     _cstr;                                                  \
     151        1134 :   }))
     152             : 
     153         252 :   info_cstr[0] = NEXT( info->sz_app,    FD_LOG_NAME_MAX              );
     154         252 :   info_cstr[1] = NEXT( info->sz_thread, FD_LOG_NAME_MAX              );
     155         252 :   info_cstr[2] = NEXT( info->sz_host,   FD_LOG_NAME_MAX              );
     156         252 :   info_cstr[3] = NEXT( info->sz_cpu,    FD_LOG_NAME_MAX              );
     157         252 :   info_cstr[4] = NEXT( info->sz_group,  FD_LOG_NAME_MAX              );
     158         252 :   info_cstr[5] = NEXT( info->sz_user,   FD_LOG_NAME_MAX              );
     159         252 :   info_cstr[6] = NEXT( info->sz_path,   PATH_MAX                     );
     160         252 :   info_cstr[7] = NEXT( info->sz_binfo,  FD_WKSP_CHECKPT_V2_BINFO_MAX );
     161         126 :   info_cstr[8] = NEXT( info->sz_uinfo,  FD_WKSP_CHECKPT_V2_UINFO_MAX );
     162             : 
     163           0 : # undef NEXT
     164             : 
     165         126 :   return FD_WKSP_SUCCESS;
     166             : 
     167           0 : fail:
     168           0 :   return FD_WKSP_ERR_FAIL;
     169         126 : }
     170             : 
     171             : /* fd_wksp_restore_v2_ftr restores the footer frame from a wksp checkpt.
     172             :    Assumes restore is valid and at the frame start, hdr has info from
     173             :    the corresponding hdr and ftr is valid.  On success, returns SUCCESS,
     174             :    *ftr will be populated with a valid data and restore will be just
     175             :    after the frame end.  On failure, returns FAIL, *ftr is clobbered and
     176             :    the caller should not assume anything about the restore state.
     177             : 
     178             :    IMPORTANT SAFETY TIP!  This only validates the ftr and hdr are
     179             :    compatible.  It is up to the caller to validate alloc_cnt,
     180             :    cgroup_cnt, volume_cnt, and frame_off as those may not have been
     181             :    known when hdr was written and ftr is restored. */
     182             : 
     183             : static int
     184             : fd_wksp_restore_v2_ftr( fd_restore_t *                   restore,
     185             :                         fd_wksp_checkpt_v2_hdr_t const * hdr,
     186             :                         fd_wksp_checkpt_v2_ftr_t *       ftr,
     187          63 :                         ulong                            checkpt_sz ) {
     188          63 :   ulong frame_off;
     189             : 
     190          63 :   RESTORE_OPEN( FD_CHECKPT_FRAME_STYLE_RAW );
     191          63 :   RESTORE_DATA( ftr, sizeof(fd_wksp_checkpt_v2_ftr_t) );
     192          63 :   RESTORE_CLOSE();
     193             : 
     194          63 :   RESTORE_TEST( frame_off      ==checkpt_sz );
     195          63 :   RESTORE_TEST( ftr->checkpt_sz==checkpt_sz );
     196             : 
     197          63 :   RESTORE_TEST( ftr->data_max                        ==hdr->data_max               );
     198          63 :   RESTORE_TEST( ftr->part_max                        ==hdr->part_max               );
     199          63 :   RESTORE_TEST( ftr->seed                            ==hdr->seed                   );
     200          63 :   RESTORE_TEST( !memcmp( ftr->name, hdr->name, FD_SHMEM_NAME_MAX )                 );
     201          63 :   RESTORE_TEST( ftr->reserved                        ==hdr->reserved               );
     202          63 :   RESTORE_TEST( ftr->frame_style_compressed          ==hdr->frame_style_compressed );
     203          63 :   RESTORE_TEST( ftr->style                           ==hdr->style                  );
     204          63 :   RESTORE_TEST( ftr->unmagic                         ==~hdr->magic                 );
     205             : 
     206          63 :   return FD_WKSP_SUCCESS;
     207             : 
     208           0 : fail:
     209           0 :   return FD_WKSP_ERR_FAIL;
     210          63 : }
     211             : 
     212             : /* fd_wksp_private_restore_v2_common does the common parts of a
     213             :    streaming and a parallel wksp restore (restores the header and info
     214             :    frames and pretty prints them to the log).  Assumes wksp and restore
     215             :    are valid and restore is on the first header byte.  On success,
     216             :    returns SUCCESS and the restore will have processed the header and
     217             :    info frames and will be positioned just after the info frame.  On
     218             :    failure, returns FAIL and restore and hdr will be in an indeterminant
     219             :    state. */
     220             : 
     221             : static int
     222             : fd_wksp_private_restore_v2_common( fd_wksp_checkpt_v2_hdr_t * hdr,
     223          63 :                                    fd_restore_t *             restore ) {
     224             : 
     225          63 :   FD_LOG_INFO(( "Restoring header and info (v2 frames 0:1)" ));
     226             : 
     227          63 :   RESTORE_TEST( !fd_wksp_restore_v2_hdr( restore, hdr ) );
     228             : 
     229          63 :   fd_wksp_checkpt_v2_info_t info[1];
     230          63 :   char                      info_buf[ 65536 ];
     231          63 :   char const *              info_cstr[9];
     232             : 
     233          63 :   RESTORE_TEST( !fd_wksp_restore_v2_info( restore, hdr, info, info_buf, 65536UL, info_cstr ) );
     234             : 
     235             :   /* Note: this mirrors printf below */
     236             : 
     237          63 :   char info_wallclock[ FD_LOG_WALLCLOCK_CSTR_BUF_SZ ];
     238          63 :   fd_log_wallclock_cstr( info->wallclock, info_wallclock );
     239             : 
     240          63 :   FD_LOG_INFO(( "\n"
     241          63 :                 "\tstyle                  %-20i\n"       /* verbose 0 info */
     242          63 :                 "\tname                   %s\n"
     243          63 :                 "\tseed                   %-20u\n"
     244          63 :                 "\tpart_max               %-20lu\n"
     245          63 :                 "\tdata_max               %-20lu\n"
     246          63 :                 "\tmagic                  %016lx\n"      /* verbose 1 info */
     247          63 :                 "\twallclock              %-20li (%s)\n"
     248          63 :                 "\tapp                    %-20lu (%s)\n"
     249          63 :                 "\tthread                 %-20lu (%s)\n"
     250          63 :                 "\thost                   %-20lu (%s)\n"
     251          63 :                 "\tcpu                    %-20lu (%s)\n"
     252          63 :                 "\tgroup                  %-20lu (%s)\n"
     253          63 :                 "\ttid                    %-20lu\n"
     254          63 :                 "\tuser                   %-20lu (%s)\n"
     255          63 :                 "\tframe_style_compressed %-20i\n"       /* (v2 specific) */
     256          63 :                 "\tmode                   %03lo",        /* (v2 specific) */
     257          63 :                 hdr->style, hdr->name, hdr->seed, hdr->part_max, hdr->data_max,
     258          63 :                 hdr->magic, info->wallclock, info_wallclock,
     259          63 :                 info->app_id,    info_cstr[0],
     260          63 :                 info->thread_id, info_cstr[1],
     261          63 :                 info->host_id,   info_cstr[2],
     262          63 :                 info->cpu_id,    info_cstr[3],
     263          63 :                 info->group_id,  info_cstr[4],
     264          63 :                 info->tid,
     265          63 :                 info->user_id,   info_cstr[5],
     266          63 :                 hdr->frame_style_compressed,
     267          63 :                 info->mode ));
     268             : 
     269             :   /* The below info cstr are potentially long enough to be truncated by
     270             :      the logger.  So we break them into separate log messages to log as
     271             :      much detail as possible. */
     272             : 
     273          63 :   FD_LOG_INFO(( "path\n\t\t%s",  info_cstr[6] )); /* verbose 2 info (v2 specific) */
     274          63 :   FD_LOG_INFO(( "binfo\n\t\t%s", info_cstr[7] )); /* verbose 2 info */
     275          63 :   FD_LOG_INFO(( "uinfo\n\t\t%s", info_cstr[8] )); /* verbose 2 info */
     276             : 
     277          63 :   return FD_WKSP_SUCCESS;
     278             : 
     279           0 : fail:
     280           0 :   return FD_WKSP_ERR_FAIL;
     281          63 : }
     282             : 
     283             : /* fd_wksp_private_restore_v2_cgroup restores a cgroup's allocation into
     284             :    wksp.  hdr contains the corresponding restore header info, frame_off
     285             :    is where the cgroup frame to restore is located and partitions
     286             :    [part_lo,part_hi) are the wksp partition indices to use for this
     287             :    frame's allocations.  Assumes all inputs have already been validated.
     288             :    Returns SUCCESS (0) on success and FAIL (negative) on failure.  On
     289             :    return, in both cases, *_dirty will be 1/0 if wksp was/was not
     290             :    modified.  On error, the restore state is indeterminant. */
     291             : 
     292             : static int
     293             : fd_wksp_private_restore_v2_cgroup( fd_wksp_t *                      wksp,
     294             :                                    fd_restore_t *                   restore,
     295             :                                    fd_wksp_checkpt_v2_hdr_t const * hdr,
     296             :                                    ulong                            frame_off_lo,
     297             :                                    ulong                            frame_off_hi,
     298             :                                    ulong                            part_lo,
     299             :                                    ulong                            part_hi,
     300          21 :                                    int *                            _dirty ) {
     301          21 :   int dirty = 0;
     302             : 
     303          21 :   fd_wksp_private_pinfo_t * pinfo    = fd_wksp_private_pinfo( wksp );
     304          21 :   ulong                     data_lo  = wksp->gaddr_lo;
     305          21 :   ulong                     data_hi  = wksp->gaddr_hi;
     306             : 
     307          21 :   ulong hdr_data_lo = fd_wksp_private_data_off( hdr->part_max );
     308          21 :   ulong hdr_data_hi = hdr_data_lo + hdr->data_max;
     309             : 
     310          21 :   ulong frame_off;
     311          21 :   RESTORE_SEEK( frame_off_lo );
     312          21 :   RESTORE_OPEN( hdr->frame_style_compressed );
     313             : 
     314             :   /* For all cgroup allocation metadata */
     315             : 
     316          21 :   fd_wksp_checkpt_v2_cmd_t cmd[1];
     317             : 
     318          96 :   for( ulong part_idx=part_lo; part_idx<part_hi; part_idx++ ) {
     319             : 
     320          75 :     RESTORE_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
     321          75 :     RESTORE_TEST( fd_wksp_checkpt_v2_cmd_is_meta( cmd ) );
     322             : 
     323          75 :     ulong tag      = cmd->meta.tag;      /* non-zero */
     324          75 :     ulong gaddr_lo = cmd->meta.gaddr_lo;
     325          75 :     ulong gaddr_hi = cmd->meta.gaddr_hi;
     326             : 
     327          75 :     RESTORE_TEST( (hdr_data_lo<=gaddr_lo) & (gaddr_lo<gaddr_hi) & (gaddr_hi<=hdr_data_hi) );
     328             :     /* Note: disjoint [gaddr_lo,gaddr_hi) tested on rebuild */
     329             : 
     330          75 :     if( FD_UNLIKELY( !((data_lo<=gaddr_lo) & (gaddr_hi<=data_hi)) ) ) {
     331           0 :       FD_LOG_WARNING(( "restore failed because checkpt partition [0x%016lx,0x%016lx) tag %lu does not fit into current "
     332           0 :                        "wksp data region [0x%016lx,0x%016lx) (data_max checkpt %lu, wksp %lu)",
     333           0 :                        gaddr_lo, gaddr_hi, tag, data_lo, data_hi, hdr->data_max, wksp->data_max ));
     334           0 :       goto fail;
     335           0 :     }
     336             : 
     337          75 :     dirty = 1;
     338          75 :     pinfo[ part_idx ].gaddr_lo = gaddr_lo;
     339          75 :     pinfo[ part_idx ].gaddr_hi = gaddr_hi;
     340          75 :     pinfo[ part_idx ].tag      = tag;
     341          75 :   }
     342             : 
     343             :   /* Restore the data command */
     344             : 
     345          21 :   RESTORE_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
     346          21 :   RESTORE_TEST( fd_wksp_checkpt_v2_cmd_is_data( cmd ) );
     347             : 
     348             :   /* For all cgroup allocation data */
     349             : 
     350          96 :   for( ulong part_idx=part_lo; part_idx<part_hi; part_idx++ ) {
     351          75 :     ulong gaddr_lo = pinfo[ part_idx ].gaddr_lo;
     352          75 :     ulong gaddr_hi = pinfo[ part_idx ].gaddr_hi;
     353             : 
     354             :     /* Restore the allocation into the wksp data region */
     355             : 
     356          75 :     dirty = 1;
     357          75 :     RESTORE_DATA( fd_wksp_laddr_fast( wksp, gaddr_lo ), gaddr_hi - gaddr_lo );
     358          75 :   }
     359             : 
     360             :   /* Close the frame */
     361             : 
     362          21 :   RESTORE_CLOSE();
     363             : 
     364          21 :   RESTORE_TEST( (frame_off_lo<frame_off) & (frame_off<=frame_off_hi) ); /* == hi if compactly stored */
     365             : 
     366          21 :   *_dirty = dirty;
     367          21 :   return FD_WKSP_SUCCESS;
     368             : 
     369           0 : fail:
     370           0 :   *_dirty = dirty;
     371           0 :   return FD_WKSP_ERR_FAIL;
     372          21 : }
     373             : 
     374             : /* fd_wksp_private_restore_v2_node dispatches cgroup restore work to
     375             :    tpool threads [t0,t1).  If any errors were encountered while
     376             :    restoring cgroups, returns the first error encountered on the lowest
     377             :    indexed thread in the int location pointed to by _err.  If any
     378             :    modifications were made to wksp (whether or not there were errors),
     379             :    the int location pointed to by _dirty will be set to 1.  Assumes
     380             :    caller is thread t0 and threads (t0,t1) are available.  Note that we
     381             :    could do this with FD_MAP_REDUCE but FD_MAP_REDUCE assumes that
     382             :    fd_scratch space is available and we can't guarantee that here.
     383             :    Likewise, we could use tpool_exec_all with a TASKQ model but
     384             :    reduction of results is less efficient. */
     385             : 
     386             : static void
     387             : fd_wksp_private_restore_v2_node( void * tpool,
     388             :                                  ulong  tpool_t0,
     389             :                                  ulong  tpool_t1,          /* Assumes t1>t0 */
     390             :                                  void * _wksp,
     391             :                                  void * _restore,
     392             :                                  ulong  _hdr,
     393             :                                  ulong  _cgroup_frame_off,
     394             :                                  ulong  _cgroup_pinfo_lo,
     395             :                                  ulong  _cgroup_nxt,
     396             :                                  ulong  cgroup_cnt,
     397             :                                  ulong  _err,
     398          63 :                                  ulong  _dirty ) {
     399             : 
     400             :   /* This node is responsible for threads [t0,t1).  If this range has
     401             :      more than one thread, split the range into left and right halves,
     402             :      have the first right half thread handle the right half, use this
     403             :      thread to handle the left half and then reduce the results from
     404             :      the two halves. */
     405             : 
     406          63 :   ulong tpool_cnt = tpool_t1 - tpool_t0;
     407          63 :   if( tpool_cnt>1UL ) {
     408           0 :     ulong tpool_ts = tpool_t0 + fd_tpool_private_split( tpool_cnt );
     409             : 
     410           0 :     int err0; int dirty0;
     411           0 :     int err1; int dirty1;
     412             : 
     413           0 :     fd_tpool_exec( tpool, tpool_ts, fd_wksp_private_restore_v2_node,
     414           0 :                    tpool, tpool_ts, tpool_t1, _wksp, _restore, _hdr, _cgroup_frame_off, _cgroup_pinfo_lo, _cgroup_nxt, cgroup_cnt,
     415           0 :                    (ulong)&err1, (ulong)&dirty1 );
     416           0 :     fd_wksp_private_restore_v2_node(
     417           0 :                    tpool, tpool_t0, tpool_ts, _wksp, _restore, _hdr, _cgroup_frame_off, _cgroup_pinfo_lo, _cgroup_nxt, cgroup_cnt,
     418           0 :                    (ulong)&err0, (ulong)&dirty0 );
     419           0 :     fd_tpool_wait( tpool, tpool_ts );
     420             : 
     421           0 :     *(int *)_err   = fd_int_if( !!err0, err0, err1 ); /* Return first error encountered */
     422           0 :     *(int *)_dirty = dirty0 | dirty1;                 /* Accumulate the dirty flag */
     423           0 :     return;
     424           0 :   }
     425             : 
     426             :   /* This node is responsible for a single thread.  Unpack the input
     427             :      arguments. */
     428             : 
     429          63 :   fd_wksp_t *                      wksp             = (fd_wksp_t *)               _wksp;
     430          63 :   fd_restore_t *                   restore          = (fd_restore_t *)            _restore; /* FIXME: CLONE RESTORE */
     431          63 :   fd_wksp_checkpt_v2_hdr_t const * hdr              = (fd_wksp_checkpt_v2_hdr_t *)_hdr;
     432          63 :   ulong const *                    cgroup_frame_off = (ulong *)                   _cgroup_frame_off;
     433          63 :   ulong const *                    cgroup_pinfo_lo  = (ulong *)                   _cgroup_pinfo_lo;
     434             : 
     435          63 :   int err   = FD_WKSP_SUCCESS;
     436          63 :   int dirty = 0;
     437             : 
     438             :   /* Since we can't have multiple threads operate concurrently on the
     439             :      same restore object, make a new restore object safe for use by this
     440             :      thread (technically could use restore directly on original thread
     441             :      t0). */
     442             : 
     443          63 :   fd_restore_t _restore_local[1];
     444          63 :   fd_restore_t * restore_local =
     445          63 :     fd_restore_init_mmio( _restore_local, fd_restore_mmio( restore ), fd_restore_mmio_sz( restore ) ); /* logs details */
     446          63 :   if( FD_UNLIKELY( !restore_local ) ) {
     447           0 :     err = FD_WKSP_ERR_FAIL;
     448           0 :     goto done;
     449           0 :   }
     450             : 
     451          84 :   for(;;) {
     452             : 
     453             :     /* Get the next cgroup to restore.  Use a dynamic task queue model
     454             :        here because we assume that restore a single cgroups requires a
     455             :        large amount of work and the amount of work is highly variable.
     456             :        Note that using an atomic increment for the cgroup_nxt counter
     457             :        assumes:
     458             : 
     459             :          cgroup_cnt << ULONG_MAX - TILE_MAX.
     460             : 
     461             :        We could use a slower atomic CAS based version instead if we want
     462             :        to insure that cgroup_nxt is never incremented beyond cgroup_cnt.
     463             :        We could also use a block partitioning or CUDA style striping if
     464             :        wanting to do a deterministic distribution but these might not
     465             :        load balance as well in various extreme circumstances. */
     466             : 
     467          84 : #   if FD_HAS_ATOMIC
     468          84 :     FD_COMPILER_MFENCE();
     469          84 :     ulong cgroup_idx = FD_ATOMIC_FETCH_AND_ADD( (ulong *)_cgroup_nxt, 1UL );
     470          84 :     FD_COMPILER_MFENCE();
     471             : #   else /* Note: this assumes platforms without HAS_ATOMIC will not be running this multithreaded */
     472             :     ulong cgroup_idx = (*(ulong *)_cgroup_nxt) + 1UL;
     473             : #   endif
     474             : 
     475          84 :     if( FD_UNLIKELY( cgroup_idx>=cgroup_cnt ) ) break; /* No more cgroups to process */
     476             : 
     477             :     /* Restore this cgroup */
     478             : 
     479          21 :     int dirty_cgroup;
     480          21 :     err = fd_wksp_private_restore_v2_cgroup( wksp, restore_local, hdr,
     481          21 :                                              cgroup_frame_off[ cgroup_idx ], cgroup_frame_off[ cgroup_idx+1UL ],
     482          21 :                                              cgroup_pinfo_lo [ cgroup_idx ], cgroup_pinfo_lo [ cgroup_idx+1UL ],
     483          21 :                                              &dirty_cgroup ); /* logs details */
     484          21 :     dirty |= dirty_cgroup;
     485          21 :     if( FD_UNLIKELY( err ) ) break; /* abort if we encountered an error */
     486             : 
     487          21 :   }
     488             : 
     489          63 :   fd_restore_fini( restore_local );
     490             : 
     491          63 : done:
     492          63 :   *(int *)_err   = err;
     493          63 :   *(int *)_dirty = dirty;
     494          63 : }
     495             : 
     496             : /* fd_wksp_private_restore_v2_mmio replaces all the allocations in a
     497             :    wksp with the allocations in the restore.  Assumes all inputs have
     498             :    are valid, restore is positioned on the first byte of the header, has
     499             :    the given size and is seekable.  Returns SUCCESS on success and the
     500             :    restore will be positioned just after the footer.  Returns FAIL if an
     501             :    error occurred before wksp was not modified and CORRUPT if an error
     502             :    occurred after.  On failure, the restore state is indeterminant.
     503             :    Uses tpool threads [t0,t1) to do the restore.  Assumes the caller is
     504             :    thread t0 and threads (t0,t1) are available for dispatch. */
     505             : 
     506             : static int
     507             : fd_wksp_private_restore_v2_mmio( fd_tpool_t *   tpool,
     508             :                                  ulong          t0,
     509             :                                  ulong          t1,
     510             :                                  fd_wksp_t *    wksp,
     511             :                                  fd_restore_t * restore,
     512          63 :                                  uint           new_seed ) {
     513             : 
     514          63 :   ulong frame_off;
     515             : 
     516          63 :   int locked = 0; /* is the wksp currently locked? */
     517          63 :   int dirty  = 0; /* has the wksp been modified? */
     518             : 
     519             :   /* Restore and validate the header, info, and footer.  In principle
     520             :      this could be parallelized but probably not worth it. */
     521             : 
     522          63 :   ulong restore_sz = fd_restore_sz( restore );
     523             : 
     524          63 :   ulong frame_off_hdr  = 0UL;
     525          63 :   ulong frame_off_info = frame_off_hdr + sizeof(fd_wksp_checkpt_v2_hdr_t);
     526          63 :   ulong frame_off_ftr  = restore_sz    - sizeof(fd_wksp_checkpt_v2_ftr_t);
     527             : 
     528          63 :   RESTORE_TEST( /*(0UL<=frame_off_hdr) &*/ (frame_off_hdr<frame_off_info) & (frame_off_info<frame_off_ftr) & (frame_off_ftr<restore_sz) );
     529             : 
     530          63 :   fd_wksp_checkpt_v2_hdr_t hdr[1];
     531             : 
     532             : //RESTORE_SEEK( frame_off_hdr );
     533          63 :   RESTORE_TEST( !fd_wksp_private_restore_v2_common( hdr, restore ) );
     534             : 
     535          63 :   FD_LOG_INFO(( "Restoring footer" ));
     536             : 
     537          63 :   fd_wksp_checkpt_v2_ftr_t ftr[1];
     538             : 
     539          63 :   RESTORE_SEEK( frame_off_ftr );
     540          63 :   RESTORE_TEST( !fd_wksp_restore_v2_ftr( restore, hdr, ftr, restore_sz ) );
     541             : 
     542          63 :   ulong frame_off_volumes = ftr->frame_off;
     543             : 
     544          63 :   RESTORE_TEST( (frame_off_info<frame_off_volumes) & (frame_off_volumes<frame_off_ftr) );
     545             : 
     546          63 :   if( FD_UNLIKELY( ftr->alloc_cnt>wksp->part_max ) ) {
     547           0 :     FD_LOG_WARNING(( "restore failed because there are too few wksp partitions to restore allocations into "
     548           0 :                      "(ftr alloc_cnt %lu, hdr part_max %lu, wksp part_max %lu)",
     549           0 :                      ftr->alloc_cnt, hdr->part_max, wksp->part_max ));
     550           0 :     goto fail;
     551           0 :   }
     552             : 
     553          63 :   FD_LOG_INFO(( "Restoring volumes" ));
     554             : 
     555          63 :   fd_wksp_checkpt_v2_cmd_t cmd[1];
     556             : 
     557          63 :   RESTORE_SEEK( frame_off_volumes );
     558          63 :   RESTORE_OPEN( hdr->frame_style_compressed );
     559          63 :   RESTORE_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
     560          63 :   RESTORE_CLOSE();
     561             : 
     562          63 :   RESTORE_TEST( (cmd->volumes.tag==0UL) & (cmd->volumes.cgroup_cnt==ULONG_MAX) ); /* frame_off_appendix tested below */
     563          63 :   RESTORE_TEST( (frame_off_volumes<frame_off) & (frame_off<=frame_off_ftr) );     /* ==frame_off_ftr if compactly stored */
     564             : 
     565          63 :   FD_LOG_INFO(( "Locking wksp" ));
     566             : 
     567          63 :   if( FD_UNLIKELY( fd_wksp_private_lock( wksp ) ) ) goto fail; /* logs details */
     568          63 :   locked = 1;
     569             : 
     570             :   /* For all volumes */
     571             : 
     572          63 :   ulong alloc_rem  = ftr->alloc_cnt;  /* Number of allocations remaining to process */
     573          63 :   ulong cgroup_rem = ftr->cgroup_cnt; /* Number of cgroups     remaining to process */
     574          63 :   ulong volume_rem = ftr->volume_cnt; /* Number of volumes     remaining to process */
     575             : 
     576          63 :   ulong frame_off_volume_lo = frame_off_info;
     577          63 :   ulong frame_off_volume_hi = frame_off_volumes;
     578          63 :   ulong frame_off_appendix  = cmd->volumes.frame_off;
     579             : 
     580         126 :   while( frame_off_appendix ) {
     581             : 
     582             :     /* Verify we still have volumes remaining and the appendix location
     583             :        is between the info frame and the next volume (or the footer if
     584             :        the last volume) */
     585             : 
     586          63 :     RESTORE_TEST( (volume_rem>0UL) & (frame_off_volume_lo<frame_off_appendix) & (frame_off_appendix<frame_off_volume_hi) );
     587             : 
     588             :     /* Now that we know where this volume's appendix is supposed to be,
     589             :        seek to it and then restore and validate it. */
     590             : 
     591          63 :     FD_LOG_INFO(( "Restoring volume appendix" ));
     592             : 
     593          63 :     RESTORE_SEEK( frame_off_appendix );
     594             : 
     595          63 :     ulong cgroup_frame_off[ FD_WKSP_RESTORE_V2_CGROUP_MAX+1UL ];
     596          63 :     ulong cgroup_pinfo_lo [ FD_WKSP_RESTORE_V2_CGROUP_MAX+1UL ];
     597          63 :     ulong cgroup_cnt;
     598             : 
     599          63 :     ulong frame_off_prev;
     600             : 
     601          63 :     {
     602          63 :       RESTORE_OPEN( hdr->frame_style_compressed );
     603             : 
     604          63 :       fd_wksp_checkpt_v2_cmd_t cmd[1];
     605             : 
     606          63 :       RESTORE_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
     607          63 :       RESTORE_TEST( fd_wksp_checkpt_v2_cmd_is_appendix( cmd ) );
     608             : 
     609          63 :       cgroup_cnt     = cmd->appendix.cgroup_cnt;
     610          63 :       frame_off_prev = cmd->appendix.frame_off;
     611             : 
     612          63 :       if( FD_UNLIKELY( cgroup_cnt>FD_WKSP_RESTORE_V2_CGROUP_MAX ) ) {
     613           0 :         FD_LOG_WARNING(( "increase FD_WKSP_RESTORE_V2_CGROUP_MAX for this target" ));
     614           0 :         goto fail;
     615           0 :       }
     616             : 
     617          63 :       RESTORE_DATA( cgroup_frame_off, cgroup_cnt*sizeof(ulong) );
     618          63 :       RESTORE_DATA( cgroup_pinfo_lo,  cgroup_cnt*sizeof(ulong) ); /* cgroup_alloc_cnt now, pinfo cgroup partitioning later */
     619          63 :       RESTORE_CLOSE();
     620             : 
     621             :       /* Verify this cgroups frames are between the previous appendix frame
     622             :          (or the info frame if the first volume) and this appendix frame
     623             :          and ordered.  Also, verify the cgroup allocation counts,
     624             :          convert the counts into a partitioning of wksp's pinfo array
     625             :          and make sure we have enough partitions in the wksp to attempt
     626             :          the restore.  In principle, this loop could be parallelized but
     627             :          probably not worth it. */
     628             : 
     629          63 :       cgroup_frame_off[ cgroup_cnt ] = frame_off_appendix;
     630          63 :       cgroup_pinfo_lo [ cgroup_cnt ] = alloc_rem;
     631             : 
     632          84 :       for( ulong cgroup_rem=cgroup_cnt; cgroup_rem; cgroup_rem-- ) {
     633             : 
     634          21 :         ulong cgroup_idx = cgroup_rem - 1UL;
     635          21 :         RESTORE_TEST( cgroup_frame_off[ cgroup_idx ] < cgroup_frame_off[ cgroup_idx+1UL ] );
     636             : 
     637          21 :         ulong cgroup_alloc_cnt = cgroup_pinfo_lo[ cgroup_idx ];
     638          21 :         RESTORE_TEST( cgroup_alloc_cnt<=alloc_rem );
     639          21 :         alloc_rem -= cgroup_alloc_cnt;
     640          21 :         cgroup_pinfo_lo[ cgroup_idx ] = alloc_rem;
     641             : 
     642          21 :       }
     643             : 
     644          63 :       RESTORE_TEST( fd_ulong_max( frame_off_prev, frame_off_info ) < cgroup_frame_off[0] );
     645          63 :     }
     646             : 
     647             :     /* At this point, we know how to do an embarassingly parallel
     648             :        restore directly into the wksp.  Dispatch work to tpool threads
     649             :        [t0,t1).  This assumes we are tpool thread t0 and threads (t0,t1)
     650             :        are available for dispatch.  On return from the dispatch, err
     651             :        will contain the error code from the lowest indexed cgroup_idx
     652             :        that encountered an error (if any error was encountered, some
     653             :        cgroups might not have been processed) and dirty_node will
     654             :        contain non-zero if the wksp was modified. */
     655             : 
     656          63 :     FD_LOG_INFO(( "Restoring volume cgroups" ));
     657             : 
     658          63 :     ulong cgroup_nxt[1];
     659             : 
     660          63 :     FD_COMPILER_MFENCE();
     661          63 :     FD_VOLATILE( cgroup_nxt[0] ) = 0UL;
     662          63 :     FD_COMPILER_MFENCE();
     663             : 
     664          63 :     int err;
     665          63 :     int dirty_node;
     666          63 :     fd_wksp_private_restore_v2_node( (void *)tpool, t0, t1,
     667          63 :                                      (void *)wksp, (void *)restore, (ulong)hdr, (ulong)cgroup_frame_off, (ulong)cgroup_pinfo_lo,
     668          63 :                                      (ulong)cgroup_nxt, cgroup_cnt, (ulong)&err, (ulong)&dirty_node );
     669          63 :     dirty |= dirty_node;
     670          63 :     if( FD_UNLIKELY( err ) ) goto fail;
     671             : 
     672             :     /* Advance to the next volume */
     673             : 
     674          63 :     cgroup_rem -= cgroup_cnt;
     675          63 :     volume_rem--;
     676             :     /* frame_off_volume_lo unchanged */
     677          63 :     frame_off_volume_hi = cgroup_frame_off[ 0 ];
     678          63 :     frame_off_appendix  = frame_off_prev;
     679          63 :   }
     680             : 
     681             :   /* Make sure we got all volumes and all cgroups and position the
     682             :      restore at the location it would have been at in a streaming
     683             :      restore. */
     684             : 
     685          63 :   RESTORE_TEST( alloc_rem ==0UL );
     686          63 :   RESTORE_TEST( cgroup_rem==0UL );
     687          63 :   RESTORE_TEST( volume_rem==0UL );
     688             : 
     689          63 :   RESTORE_SEEK( restore_sz );
     690             : 
     691             :   /* Free any remaining old allocations and rebuild the wksp with our
     692             :      freshly restored allocations.  In principle the free loop can be
     693             :      parallelized but it is probably not worth it. */
     694             : 
     695          63 :   FD_LOG_INFO(( "Rebuilding wksp" ));
     696             : 
     697          63 :   dirty = 1;
     698             : 
     699          63 :   fd_wksp_private_pinfo_t * pinfo    = fd_wksp_private_pinfo( wksp );
     700          63 :   ulong                     part_max = wksp->part_max;
     701             : 
     702     1031172 :   for( ulong part_idx=ftr->alloc_cnt; part_idx<part_max; part_idx++ ) pinfo[ part_idx ].tag = 0UL;
     703             : 
     704          63 :   if( FD_UNLIKELY( fd_wksp_rebuild( wksp, new_seed ) ) ) goto fail; /* logs details */
     705             : 
     706          63 :   FD_LOG_INFO(( "Unlocking wksp" ));
     707             : 
     708          63 :   fd_wksp_private_unlock( wksp );
     709             : 
     710          63 :   return FD_WKSP_SUCCESS;
     711             : 
     712           0 : fail: /* Release resources that might be reserved */
     713             : 
     714           0 :   if( FD_LIKELY( locked ) ) fd_wksp_private_unlock( wksp );
     715             : 
     716           0 :   return fd_int_if( dirty, FD_WKSP_ERR_CORRUPT, FD_WKSP_ERR_FAIL );
     717          63 : }
     718             : 
     719             : /* fd_wksp_private_restore_v2_stream is identical to
     720             :    fd_wksp_private_restore_v2_mmio (above) but usable when restore is
     721             :    not using memory mapped i/o under the hood.  This includes when the
     722             :    restore is from a non-seekable file descriptor (e.g. when the restore
     723             :    is from a pipe or socket but this will work fine if used on mmio
     724             :    restores too).  Restore must be compactly stored.  Exact same
     725             :    behaviors. */
     726             : 
     727             : static int
     728             : fd_wksp_private_restore_v2_stream( fd_wksp_t *    wksp,
     729             :                                    fd_restore_t * restore,
     730           0 :                                    uint           new_seed ) {
     731           0 :   ulong frame_off;
     732             : 
     733           0 :   int locked = 0; /* is the wksp currently locked */
     734           0 :   int dirty  = 0; /* has the wksp been modified? */
     735             : 
     736           0 :   fd_wksp_checkpt_v2_hdr_t hdr[1];
     737             : 
     738           0 :   RESTORE_TEST( !fd_wksp_private_restore_v2_common( hdr, restore ) );
     739             : 
     740           0 :   FD_LOG_INFO(( "Locking wksp" ));
     741             : 
     742           0 :   if( FD_UNLIKELY( fd_wksp_private_lock( wksp ) ) ) goto fail; /* logs details */
     743           0 :   locked = 1;
     744             : 
     745           0 :   fd_wksp_private_pinfo_t * pinfo    = fd_wksp_private_pinfo( wksp );
     746           0 :   ulong                     part_max = wksp->part_max;
     747           0 :   ulong                     data_max = wksp->data_max;
     748           0 :   ulong                     data_lo  = wksp->gaddr_lo;
     749           0 :   ulong                     data_hi  = wksp->gaddr_hi;
     750             : 
     751           0 :   ulong hdr_data_max = hdr->data_max;
     752           0 :   ulong hdr_data_lo  = fd_wksp_private_data_off( hdr->part_max );
     753           0 :   ulong hdr_data_hi  = hdr_data_lo + hdr_data_max;
     754             : 
     755             :   /* For all volumes in the checkpt */
     756             : 
     757           0 :   ulong ftr_alloc_cnt  = 0UL;
     758           0 :   ulong ftr_cgroup_cnt = 0UL;
     759           0 :   ulong ftr_volume_cnt = 0UL;
     760           0 :   ulong frame_off_prev = 0UL;
     761             : 
     762           0 :   for(;;) {
     763             : 
     764           0 :     FD_LOG_INFO(( "Restoring volume %lu", ftr_volume_cnt ));
     765             : 
     766           0 :     ulong vol_cgroup_cnt = 0UL;
     767             : 
     768           0 :     ulong vol_cgroup_frame_off[ FD_WKSP_RESTORE_V2_CGROUP_MAX ];
     769           0 :     ulong vol_cgroup_alloc_cnt[ FD_WKSP_RESTORE_V2_CGROUP_MAX ];
     770             : 
     771           0 :     ulong vol_appendix_frame_off[ FD_WKSP_RESTORE_V2_CGROUP_MAX ];
     772           0 :     ulong vol_appendix_alloc_cnt[ FD_WKSP_RESTORE_V2_CGROUP_MAX ];
     773             : 
     774             :     /* For all cgroups in the volume */
     775             : 
     776           0 :     for(;;) {
     777             : 
     778           0 :       ulong part_lo = ftr_alloc_cnt;
     779             : 
     780             :       /* Open the frame and read the leading command to determine if the
     781             :          frame is a cgroup, appendix (which ends the volume) or an end
     782             :          of volumes frame (which ends the checkpt).  If it is an
     783             :          appendix, validate and close the frame and proceed to the next
     784             :          volume.  If it is the end of volumes, validate and close the
     785             :          frame and proceed to footer processing.  Otherwise, proceed to
     786             :          processing a cgroup frame. */
     787             : 
     788           0 :       RESTORE_OPEN( hdr->frame_style_compressed );
     789             : 
     790           0 :       fd_wksp_checkpt_v2_cmd_t cmd[1];
     791             : 
     792           0 :       RESTORE_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
     793             : 
     794           0 :       if( FD_UNLIKELY( fd_wksp_checkpt_v2_cmd_is_appendix( cmd ) ) ) {
     795           0 :         RESTORE_TEST( cmd->appendix.frame_off==frame_off_prev );
     796           0 :         frame_off_prev = frame_off;
     797             : 
     798           0 :         RESTORE_DATA( vol_appendix_frame_off, vol_cgroup_cnt*sizeof(ulong) );
     799           0 :         RESTORE_DATA( vol_appendix_alloc_cnt, vol_cgroup_cnt*sizeof(ulong) );
     800           0 :         RESTORE_CLOSE();
     801             : 
     802           0 :         RESTORE_TEST( !memcmp( vol_appendix_frame_off, vol_cgroup_frame_off, vol_cgroup_cnt*sizeof(ulong) ) );
     803           0 :         RESTORE_TEST( !memcmp( vol_appendix_alloc_cnt, vol_cgroup_alloc_cnt, vol_cgroup_cnt*sizeof(ulong) ) );
     804             : 
     805           0 :         break;
     806           0 :       }
     807             : 
     808           0 :       if( FD_UNLIKELY( fd_wksp_checkpt_v2_cmd_is_volumes( cmd ) ) ) {
     809           0 :         RESTORE_TEST( cmd->volumes.frame_off==frame_off_prev );
     810           0 :         frame_off_prev = frame_off;
     811             : 
     812           0 :         RESTORE_CLOSE();
     813             : 
     814           0 :         goto restore_footer;
     815           0 :       }
     816             : 
     817             :       /* At this point, we have read the leading command of a cgroup frame.
     818             :          Restore the cgroup allocation metadata. */
     819             : 
     820           0 :       if( FD_UNLIKELY( vol_cgroup_cnt>=FD_WKSP_RESTORE_V2_CGROUP_MAX ) ) {
     821           0 :         FD_LOG_WARNING(( "increase FD_WKSP_RESTORE_V2_CGROUP_MAX" ));
     822           0 :         goto fail;
     823           0 :       }
     824             : 
     825           0 :       vol_cgroup_frame_off[ vol_cgroup_cnt ] = frame_off;
     826             : 
     827           0 :       for(;;) {
     828           0 :         if( FD_UNLIKELY( fd_wksp_checkpt_v2_cmd_is_data( cmd ) ) ) break;
     829           0 :         RESTORE_TEST( fd_wksp_checkpt_v2_cmd_is_meta( cmd ) );
     830             : 
     831           0 :         ulong tag      = cmd->meta.tag;      /* non-zero */
     832           0 :         ulong gaddr_lo = cmd->meta.gaddr_lo;
     833           0 :         ulong gaddr_hi = cmd->meta.gaddr_hi;
     834             : 
     835           0 :         RESTORE_TEST( (hdr_data_lo<=gaddr_lo) & (gaddr_lo<gaddr_hi) & (gaddr_hi<=hdr_data_hi) );
     836             :         /* Note: disjoint [gaddr_lo,gaddr_hi) tested on rebuild */
     837             : 
     838           0 :         if( FD_UNLIKELY( !((data_lo<=gaddr_lo) & (gaddr_hi<=data_hi)) ) ) {
     839           0 :           FD_LOG_WARNING(( "restore failed because checkpt allocation [0x%016lx,0x%016lx) tag %lu does not fit into the wksp "
     840           0 :                            "data region [0x%016lx,0x%016lx) (hdr_data_max %lu, wksp_data_max %lu)",
     841           0 :                            gaddr_lo, gaddr_hi, tag, data_lo, data_hi, hdr_data_max, data_max ));
     842           0 :           goto fail;
     843           0 :         }
     844             : 
     845           0 :         if( FD_UNLIKELY( ftr_alloc_cnt>=part_max ) ) {
     846           0 :           FD_LOG_WARNING(( "restore failed because there are too few wksp partitions to restore allocations into "
     847           0 :                            "(alloc_cnt %lu, hdr_part_max %lu, wksp_part_max %lu)",
     848           0 :                            ftr_alloc_cnt, hdr->part_max, wksp->part_max ));
     849           0 :           goto fail;
     850           0 :         }
     851             : 
     852           0 :         dirty = 1;
     853           0 :         pinfo[ ftr_alloc_cnt ].gaddr_lo = gaddr_lo;
     854           0 :         pinfo[ ftr_alloc_cnt ].gaddr_hi = gaddr_hi;
     855           0 :         pinfo[ ftr_alloc_cnt ].tag      = tag;
     856           0 :         ftr_alloc_cnt++;
     857             : 
     858           0 :         RESTORE_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
     859           0 :       }
     860             : 
     861             :       /* At this point, we have restored all cgroup allocation metadata
     862             :          into the pinfo array at [part_lo,ftr_alloc_cnt).  Restore the
     863             :          corresponding cgroup allocation data. */
     864             : 
     865           0 :       for( ulong part_idx=part_lo; part_idx<ftr_alloc_cnt; part_idx++ ) {
     866           0 :         ulong gaddr_lo = pinfo[ part_idx ].gaddr_lo;
     867           0 :         ulong gaddr_hi = pinfo[ part_idx ].gaddr_hi;
     868             : 
     869           0 :         dirty = 1;
     870           0 :         RESTORE_DATA( fd_wksp_laddr_fast( wksp, gaddr_lo ), gaddr_hi - gaddr_lo );
     871           0 :       }
     872             : 
     873             :       /* Close the cgroup frame */
     874             : 
     875           0 :       RESTORE_CLOSE();
     876             : 
     877             :       /* Update verification info */
     878             : 
     879           0 :       vol_cgroup_alloc_cnt[ vol_cgroup_cnt ] = ftr_alloc_cnt - part_lo;
     880           0 :       vol_cgroup_cnt++;
     881             : 
     882           0 :     }
     883             : 
     884             :     /* Update verification info */
     885             : 
     886           0 :     ftr_cgroup_cnt += vol_cgroup_cnt;
     887           0 :     ftr_volume_cnt++;
     888           0 :   }
     889             : 
     890           0 : restore_footer:
     891             : 
     892             :   /* At this point, the checkpt is positioned at the start of the
     893             :      footer.  Restore and validate it.  Note that checkpt data has been
     894             :      fully decompressed into the wksp pinfo and data region but the wksp
     895             :      indexing structures have not been rebuilt.  Further note that
     896             :      restoring the footer is pure validation. */
     897             : 
     898           0 :   FD_LOG_INFO(( "Restoring footer" ));
     899             : 
     900           0 :   fd_wksp_checkpt_v2_ftr_t ftr[1];
     901             : 
     902           0 :   RESTORE_TEST( !fd_wksp_restore_v2_ftr( restore, hdr, ftr, frame_off + sizeof(fd_wksp_checkpt_v2_ftr_t) ) );
     903             : 
     904           0 :   RESTORE_TEST( ftr->alloc_cnt ==ftr_alloc_cnt  );
     905           0 :   RESTORE_TEST( ftr->cgroup_cnt==ftr_cgroup_cnt );
     906           0 :   RESTORE_TEST( ftr->volume_cnt==ftr_volume_cnt );
     907           0 :   RESTORE_TEST( ftr->frame_off ==frame_off_prev );
     908             : 
     909           0 :   FD_LOG_INFO(( "Rebuilding wksp" ));
     910             : 
     911             :   /* Free any remaining old allocations and rebuild the wksp with
     912             :      the freshly restored allocations */
     913             : 
     914           0 :   dirty = 1;
     915           0 :   for( ulong part_idx=ftr_alloc_cnt; part_idx<part_max; part_idx++ ) pinfo[ part_idx ].tag = 0UL;
     916             : 
     917           0 :   if( FD_UNLIKELY( fd_wksp_rebuild( wksp, new_seed ) ) ) goto fail; /* logs details */
     918             : 
     919           0 :   FD_LOG_INFO(( "Unlocking wksp" ));
     920             : 
     921           0 :   fd_wksp_private_unlock( wksp );
     922             : 
     923           0 :   return FD_WKSP_SUCCESS;
     924             : 
     925           0 : fail: /* Release resources that might be reserved */
     926             : 
     927           0 :   if( FD_LIKELY( locked ) ) fd_wksp_private_unlock( wksp );
     928             : 
     929           0 :   return fd_int_if( dirty, FD_WKSP_ERR_CORRUPT, FD_WKSP_ERR_FAIL );
     930           0 : }
     931             : 
     932             : int
     933             : fd_wksp_private_restore_v2( fd_tpool_t * tpool,
     934             :                             ulong        t0,
     935             :                             ulong        t1,
     936             :                             fd_wksp_t *  wksp,
     937             :                             char const * path,
     938          63 :                             uint         new_seed ) {
     939             : 
     940          63 :   FD_LOG_INFO(( "Restoring checkpt \"%s\" into wksp \"%s\" (seed %u)", path, wksp->name, new_seed ));
     941             : 
     942          63 :   int            fd      = -1;
     943          63 :   void const *   mmio    = NULL;
     944          63 :   ulong          mmio_sz = 0UL;
     945          63 :   fd_restore_t * restore = NULL;
     946             : 
     947          63 :   fd_restore_t   _restore[ 1 ];
     948          63 :   uchar          rbuf[ FD_RESTORE_RBUF_MIN ];
     949             : 
     950          63 :   FD_LOG_INFO(( "Opening checkpt" ));
     951             : 
     952          63 :   fd = open( path, O_RDONLY, (mode_t)0 );
     953          63 :   if( FD_UNLIKELY( fd==-1 ) ) {
     954           0 :     FD_LOG_WARNING(( "open(\"%s\",O_RDONLY,0) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
     955           0 :     goto fail;
     956           0 :   }
     957             : 
     958          63 :   int err = fd_io_mmio_init( fd, FD_IO_MMIO_MODE_READ_ONLY, &mmio, &mmio_sz );
     959          63 :   if( FD_LIKELY( !err ) ) {
     960             : 
     961          63 :     FD_LOG_INFO(( "Restoring checkpt with mmio" ));
     962             : 
     963             :     /* FIXME: consider trimming off prefix / suffix here (i.e. scan for
     964             :        MAGIC / ~MAGIC) */
     965             : 
     966          63 :     restore = fd_restore_init_mmio( _restore, mmio, mmio_sz ); /* logs details */
     967          63 :     if( FD_UNLIKELY( !restore ) ) goto fail;
     968             : 
     969          63 :     err = fd_wksp_private_restore_v2_mmio( tpool, t0, t1, wksp, restore, new_seed ); /* logs details */
     970          63 :     if( FD_UNLIKELY( err ) ) goto fail;
     971             : 
     972          63 :   } else {
     973             : 
     974           0 :     FD_LOG_INFO(( "\"%s\" does not appear to support mmio (%i-%s); restoring checkpt with streaming",
     975           0 :                   path, err, fd_io_strerror( err ) ));
     976             : 
     977             :     /* FIXME: consider trimming off prefix (i.e. scan for MAGIC) here */
     978             : 
     979           0 :     restore = fd_restore_init_stream( _restore, fd, rbuf, FD_RESTORE_RBUF_MIN ); /* logs details */
     980           0 :     if( FD_UNLIKELY( !restore ) ) goto fail;
     981             : 
     982           0 :     err = fd_wksp_private_restore_v2_stream( wksp, restore, new_seed ); /* logs details */
     983           0 :     if( FD_UNLIKELY( err ) ) goto fail;
     984             : 
     985           0 :   }
     986             : 
     987          63 :   FD_LOG_INFO(( "Closing checkpt" ));
     988             : 
     989          63 :   if( FD_UNLIKELY( !fd_restore_fini( restore ) ) ) /* logs details */
     990           0 :     FD_LOG_WARNING(( "fd_restore_fini failed; attempting to continue" ));
     991             : 
     992          63 :   if( FD_LIKELY( mmio_sz ) ) fd_io_mmio_fini( mmio, mmio_sz );
     993             : 
     994          63 :   if( FD_UNLIKELY( close( fd ) ) )
     995           0 :     FD_LOG_WARNING(( "close(\"%s\") failed (%i-%s); attempting to continue", path, errno, fd_io_strerror( errno ) ));
     996             : 
     997          63 :   return err;
     998             : 
     999           0 : fail:
    1000             : 
    1001           0 :   if( FD_LIKELY( restore ) ) {
    1002           0 :     if( FD_UNLIKELY( fd_restore_in_frame( restore ) ) && FD_UNLIKELY( fd_restore_close( restore ) ) )
    1003           0 :       FD_LOG_WARNING(( "fd_restore_close failed; attempting to continue" ));
    1004             : 
    1005           0 :     if( FD_UNLIKELY( !fd_restore_fini( restore ) ) ) /* logs details */
    1006           0 :       FD_LOG_WARNING(( "fd_restore_fini failed; attempting to continue" ));
    1007           0 :   }
    1008             : 
    1009           0 :   if( FD_LIKELY( mmio_sz ) ) fd_io_mmio_fini( mmio, mmio_sz );
    1010             : 
    1011           0 :   if( FD_LIKELY( fd!=-1 ) && FD_UNLIKELY( close( fd ) ) )
    1012           0 :     FD_LOG_WARNING(( "close(\"%s\") failed (%i-%s); attempting to continue", path, errno, fd_io_strerror( errno ) ));
    1013             : 
    1014           0 :   return FD_WKSP_ERR_FAIL;
    1015          63 : }
    1016             : 
    1017             : int
    1018             : fd_wksp_private_printf_v2( int          out,
    1019             :                            char const * path,
    1020          63 :                            int          verbose ) {
    1021             : 
    1022          63 :   int ret = 0;
    1023         117 : # define TRAP(x) do { int _err = (x); if( FD_UNLIKELY( _err<0 ) ) { ret = _err; goto fail; } ret += _err; } while(0)
    1024             : 
    1025          63 :   int            fd      = -1;
    1026          63 :   fd_restore_t * restore = NULL;
    1027             : 
    1028          63 :   fd_restore_t _restore[ 1 ];
    1029          63 :   uchar        rbuf[ FD_RESTORE_RBUF_MIN ];
    1030             : 
    1031             :   /* Print the header and metadata */
    1032             : 
    1033          63 :   if( verbose>=1 ) {
    1034             : 
    1035             :      /* Open the restore */
    1036             : 
    1037          63 :     fd = open( path, O_RDONLY, (mode_t)0 );
    1038          63 :     if( FD_UNLIKELY( fd==-1 ) ) {
    1039           0 :       FD_LOG_WARNING(( "open(\"%s\",O_RDONLY,0) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
    1040           0 :       goto fail;
    1041           0 :     }
    1042             : 
    1043          63 :     restore = fd_restore_init_stream( _restore, fd, rbuf, FD_RESTORE_RBUF_MIN ); /* logs details */
    1044          63 :     if( FD_UNLIKELY( !restore ) ) goto fail;
    1045             : 
    1046             :     /* Restore the header */
    1047             : 
    1048          63 :     fd_wksp_checkpt_v2_hdr_t hdr[1];
    1049             : 
    1050          63 :     RESTORE_TEST( !fd_wksp_restore_v2_hdr( restore, hdr ) );
    1051             : 
    1052             :     /* Restore the info */
    1053             : 
    1054          63 :     fd_wksp_checkpt_v2_info_t info[1];
    1055          63 :     char                      info_buf[ 65536 ];
    1056          63 :     char const *              info_cstr[ 9 ];
    1057             : 
    1058          63 :     RESTORE_TEST( !fd_wksp_restore_v2_info( restore, hdr, info, info_buf, 65536UL, info_cstr ) );
    1059             : 
    1060          63 :     char info_wallclock[ FD_LOG_WALLCLOCK_CSTR_BUF_SZ ];
    1061          63 :     fd_log_wallclock_cstr( info->wallclock, info_wallclock );
    1062             : 
    1063             :     /* Pretty print the header and info */
    1064             : 
    1065          63 :     TRAP( dprintf( out,
    1066             :                  //"\tstyle                  %-20i\n"        /* verbose 0 info (already printed) */
    1067             :                  //"\tname                   %s\n"
    1068             :                  //"\tseed                   %-20u\n"
    1069             :                  //"\tpart_max               %-20lu\n"
    1070             :                  //"\tdata_max               %-20lu\n"
    1071          63 :                    "\tmagic                  %016lx\n"      /* verbose 1 info */
    1072          63 :                    "\twallclock              %-20li (%s)\n"
    1073          63 :                    "\tapp                    %-20lu (%s)\n"
    1074          63 :                    "\tthread                 %-20lu (%s)\n"
    1075          63 :                    "\thost                   %-20lu (%s)\n"
    1076          63 :                    "\tcpu                    %-20lu (%s)\n"
    1077          63 :                    "\tgroup                  %-20lu (%s)\n"
    1078          63 :                    "\ttid                    %-20lu\n"
    1079          63 :                    "\tuser                   %-20lu (%s)\n"
    1080          63 :                    "\tframe_style_compressed %-20i\n",      /* (v2 specific) */
    1081          63 :                    hdr->magic,
    1082          63 :                    info->wallclock, info_wallclock,
    1083          63 :                    info->app_id,    info_cstr[0],
    1084          63 :                    info->thread_id, info_cstr[1],
    1085          63 :                    info->host_id,   info_cstr[2],
    1086          63 :                    info->cpu_id,    info_cstr[3],
    1087          63 :                    info->group_id,  info_cstr[4],
    1088          63 :                    info->tid,
    1089          63 :                    info->user_id,   info_cstr[5],
    1090          63 :                    hdr->frame_style_compressed ) );
    1091             : 
    1092          63 :     if( verbose>=2 )
    1093          54 :       TRAP( dprintf( out, "\tmode                   %03lo\n" /* (v2 specific) */
    1094          63 :                           "\tpath\n\t\t%s\n"                 /* (v2 specific) */
    1095          63 :                           "\tbinfo\n\t\t%s\n"
    1096          63 :                           "\tuinfo\n\t\t%s\n",
    1097          63 :                           info->mode, info_cstr[6], info_cstr[7], info_cstr[8] ) );
    1098             : 
    1099             :     /* FIXME: consider implement handling of verbose>=3.  Since data in a
    1100             :        compressed frame can't be easily skipped over (due to sequential
    1101             :        dependencies between compressed data bufs inherently induced by
    1102             :        compression algos), we would:
    1103             : 
    1104             :        Use stat to get the size of the checkpt, seek to the end of the
    1105             :        file and restore the footer frame to get the appendix frame
    1106             :        location, seek to the appendix frame, and restore it to get the
    1107             :        cgroup frame offsets and partition counts.  Then, for each cgroup,
    1108             :        seek to the cgruop frame, init a streaming restore, open the frame,
    1109             :        restore the partition count and partition metadata (which is
    1110             :        conveniently located at the start of a cgroup frame), close it and
    1111             :        fini the restore.  Omitting for now as this isn't particularly
    1112             :        important functionality. */
    1113             : 
    1114             :     /* Finish restoring */
    1115             : 
    1116          63 :     if( FD_UNLIKELY( !fd_restore_fini( restore ) ) ) /* logs details */
    1117           0 :       FD_LOG_WARNING(( "fd_restore_fini failed; attempting to continue" ));
    1118             : 
    1119          63 :     if( FD_UNLIKELY( close( fd ) ) )
    1120           0 :       FD_LOG_WARNING(( "close failed (%i-%s); attempting to continue", errno, fd_io_strerror( errno ) ));
    1121          63 :   }
    1122             : 
    1123          63 : # undef TRAP
    1124             : 
    1125          63 :   return ret;
    1126             : 
    1127           0 : fail: /* Release resources that might be reserved */
    1128             : 
    1129           0 :   if( FD_LIKELY( restore ) ) {
    1130           0 :     if( FD_UNLIKELY( fd_restore_in_frame( restore ) ) && FD_UNLIKELY( fd_restore_close( restore ) ) )
    1131           0 :       FD_LOG_WARNING(( "fd_restore_close failed; attempting to continue" ));
    1132             : 
    1133           0 :     if( FD_UNLIKELY( !fd_restore_fini( restore ) ) ) /* logs details */
    1134           0 :       FD_LOG_WARNING(( "fd_restore_fini failed; attempting to continue" ));
    1135           0 :   }
    1136             : 
    1137           0 :   if( FD_LIKELY( fd!=-1 ) && FD_UNLIKELY( close( fd ) ) )
    1138           0 :     FD_LOG_WARNING(( "close failed (%i-%s); attempting to continue", errno, fd_io_strerror( errno ) ));
    1139             : 
    1140           0 :   return ret;
    1141          63 : }
    1142             : 
    1143             : #undef RESTORE_TEST
    1144             : #undef RESTORE_DATA
    1145             : #undef RESTORE_META
    1146             : #undef RESTORE_CLOSE
    1147             : #undef RESTORE_OPEN
    1148             : #undef RESTORE_SEEK

Generated by: LCOV version 1.14