LCOV - code coverage report
Current view: top level - discof/restore - fd_snapdc_tile.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 229 0.0 %
Date: 2026-06-29 05:51:35 Functions: 0 11 0.0 %

          Line data    Source code
       1             : #include "utils/fd_ssctrl.h"
       2             : 
       3             : #include "../../disco/topo/fd_topo.h"
       4             : #include "../../disco/metrics/fd_metrics.h"
       5             : 
       6             : #include "generated/fd_snapdc_tile_seccomp.h"
       7             : 
       8             : #define ZSTD_STATIC_LINKING_ONLY
       9             : #include <zstd.h>
      10             : 
      11             : #define NAME "snapdc"
      12             : 
      13           0 : #define ZSTD_WINDOW_SZ (1UL<<25UL) /* 32MiB */
      14             : 
      15             : /* The snapdc tile is a state machine that decompresses the full and
      16             :    optionally incremental snapshot byte stream that it receives from the
      17             :    snapld tile.  In the event that the snapshot is already uncompressed,
      18             :    this tile simply copies the stream to the next tile in the pipeline. */
      19             : 
      20             : struct fd_snapdc_tile {
      21             :   uint full    : 1;
      22             :   uint is_zstd : 1;
      23             :   uint dirty   : 1;  /* in the middle of a frame? */
      24             :   int state;
      25             : 
      26             :   ZSTD_DCtx * zstd;
      27             : 
      28             :   struct {
      29             :     fd_wksp_t * mem;
      30             :     ulong       chunk0;
      31             :     ulong       wmark;
      32             :     ulong       mtu;
      33             :     ulong       frag_pos;
      34             :   } in;
      35             : 
      36             :   struct {
      37             :     fd_wksp_t * mem;
      38             :     ulong       chunk0;
      39             :     ulong       wmark;
      40             :     ulong       chunk;
      41             :     ulong       mtu;
      42             :   } out;
      43             : 
      44             :   struct {
      45             :     struct {
      46             :       ulong compressed_bytes_read;
      47             :       ulong decompressed_bytes_written;
      48             :     } full;
      49             : 
      50             :     struct {
      51             :       ulong compressed_bytes_read;
      52             :       ulong decompressed_bytes_written;
      53             :     } incremental;
      54             :   } metrics;
      55             : };
      56             : typedef struct fd_snapdc_tile fd_snapdc_tile_t;
      57             : 
      58             : FD_FN_PURE static ulong
      59           0 : scratch_align( void ) {
      60           0 :   return fd_ulong_max( alignof(fd_snapdc_tile_t), 32UL );
      61           0 : }
      62             : 
      63             : FD_FN_PURE static ulong
      64           0 : scratch_footprint( fd_topo_tile_t const * tile ) {
      65           0 :   (void)tile;
      66           0 :   ulong l = FD_LAYOUT_INIT;
      67           0 :   l = FD_LAYOUT_APPEND( l, alignof(fd_snapdc_tile_t), sizeof(fd_snapdc_tile_t)                   );
      68           0 :   l = FD_LAYOUT_APPEND( l, 32UL,                      ZSTD_estimateDStreamSize( ZSTD_WINDOW_SZ ) );
      69           0 :   return FD_LAYOUT_FINI( l, scratch_align() );
      70           0 : }
      71             : 
      72             : static inline int
      73           0 : should_shutdown( fd_snapdc_tile_t * ctx ) {
      74           0 :   return ctx->state==FD_SNAPSHOT_STATE_SHUTDOWN;
      75           0 : }
      76             : 
      77             : static void
      78           0 : metrics_write( fd_snapdc_tile_t * ctx ) {
      79           0 :   FD_MGAUGE_SET( SNAPDC, FULL_COMPRESSED_BYTES_READ,              ctx->metrics.full.compressed_bytes_read );
      80           0 :   FD_MGAUGE_SET( SNAPDC, FULL_DECOMPRESSED_BYTES_WRITTEN,         ctx->metrics.full.decompressed_bytes_written );
      81             : 
      82           0 :   FD_MGAUGE_SET( SNAPDC, INCREMENTAL_COMPRESSED_BYTES_READ,       ctx->metrics.incremental.compressed_bytes_read );
      83           0 :   FD_MGAUGE_SET( SNAPDC, INCREMENTAL_DECOMPRESSED_BYTES_WRITTEN,  ctx->metrics.incremental.decompressed_bytes_written );
      84             : 
      85           0 :   FD_MGAUGE_SET( SNAPDC, STATE,                                   (ulong)(ctx->state) );
      86           0 : }
      87             : 
      88             : static void
      89             : transition_malformed( fd_snapdc_tile_t *  ctx,
      90           0 :                       fd_stem_context_t * stem ) {
      91           0 :   if( FD_UNLIKELY( ctx->state==FD_SNAPSHOT_STATE_ERROR ) ) return;
      92           0 :   ctx->state = FD_SNAPSHOT_STATE_ERROR;
      93           0 :   fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_CTRL_ERROR, 0UL, 0UL, 0UL, 0UL, 0UL );
      94           0 : }
      95             : 
      96             : static inline void
      97             : handle_control_frag( fd_snapdc_tile_t *  ctx,
      98             :                      fd_stem_context_t * stem,
      99             :                      ulong               sig,
     100             :                      ulong               chunk,
     101           0 :                      ulong               sz ) {
     102           0 :   if( FD_UNLIKELY( sig==FD_SNAPSHOT_MSG_META ) ) return;
     103           0 :   if( FD_UNLIKELY( sig==FD_SNAPSHOT_MSG_LOAD_COMPLETE ) ) return;
     104             : 
     105             :   /* All control messages cause us to want to reset the decompression stream */
     106           0 :   ulong error = ZSTD_DCtx_reset( ctx->zstd, ZSTD_reset_session_only );
     107           0 :   if( FD_UNLIKELY( ZSTD_isError( error ) ) ) FD_LOG_ERR(( "ZSTD_DCtx_reset failed (%lu-%s)", error, ZSTD_getErrorName( error ) ));
     108             : 
     109           0 :   if( ctx->state==FD_SNAPSHOT_STATE_ERROR && sig!=FD_SNAPSHOT_MSG_CTRL_FAIL ) {
     110             :     /* Control messages move along the snapshot load pipeline.  Since
     111             :        error conditions can be triggered by any tile in the pipeline,
     112             :        it is possible to be in error state and still receive otherwise
     113             :        valid messages.  Only a fail message can revert this. */
     114           0 :     return;
     115           0 :   };
     116             : 
     117           0 :   int forward_msg = 1;
     118             : 
     119           0 :   switch( sig ) {
     120           0 :     case FD_SNAPSHOT_MSG_CTRL_INIT_FULL:
     121           0 :     case FD_SNAPSHOT_MSG_CTRL_INIT_INCR: {
     122           0 :       FD_TEST( ctx->state==FD_SNAPSHOT_STATE_IDLE );
     123           0 :       ctx->state = FD_SNAPSHOT_STATE_PROCESSING;
     124           0 :       FD_TEST( sz==sizeof(fd_ssctrl_init_t) );
     125           0 :       fd_ssctrl_init_t const * msg = fd_chunk_to_laddr_const( ctx->in.mem, chunk );
     126           0 :       ctx->full = sig==FD_SNAPSHOT_MSG_CTRL_INIT_FULL;
     127           0 :       ctx->is_zstd = !!msg->zstd;
     128           0 :       ctx->dirty = 0;
     129           0 :       ctx->in.frag_pos = 0UL;
     130           0 :       if( ctx->full ) {
     131           0 :         ctx->metrics.full.compressed_bytes_read      = 0UL;
     132           0 :         ctx->metrics.full.decompressed_bytes_written = 0UL;
     133           0 :       } else {
     134           0 :         ctx->metrics.incremental.compressed_bytes_read      = 0UL;
     135           0 :         ctx->metrics.incremental.decompressed_bytes_written = 0UL;
     136           0 :       }
     137           0 :       fd_ssctrl_init_t * msg_out = fd_chunk_to_laddr( ctx->out.mem, ctx->out.chunk );
     138           0 :       fd_memcpy( msg_out, msg, sz );
     139           0 :       fd_stem_publish( stem, 0UL, sig, ctx->out.chunk, sz, 0UL, 0UL, 0UL );
     140           0 :       ctx->out.chunk = fd_dcache_compact_next( ctx->out.chunk, ctx->out.mtu, ctx->out.chunk0, ctx->out.wmark );
     141           0 :       forward_msg = 0; // we forward the control message in the `fd_ssctrl_init_t` message
     142           0 :       break;
     143           0 :     }
     144             : 
     145           0 :     case FD_SNAPSHOT_MSG_CTRL_FINI: {
     146           0 :       FD_TEST( ctx->state==FD_SNAPSHOT_STATE_PROCESSING );
     147           0 :       ctx->state = FD_SNAPSHOT_STATE_FINISHING;
     148           0 :       if( FD_UNLIKELY( ctx->is_zstd && ctx->dirty ) ) {
     149           0 :         FD_LOG_WARNING(( "encountered end-of-file in the middle of a compressed frame for %s snapshot",
     150           0 :                          ctx->full ? "full" : "incremental" ));
     151           0 :         transition_malformed( ctx, stem );
     152           0 :         forward_msg = 0;
     153           0 :         break;
     154           0 :       }
     155           0 :       break;
     156           0 :     }
     157             : 
     158           0 :     case FD_SNAPSHOT_MSG_CTRL_NEXT:
     159           0 :     case FD_SNAPSHOT_MSG_CTRL_DONE: {
     160           0 :       FD_TEST( ctx->state==FD_SNAPSHOT_STATE_FINISHING );
     161           0 :       ctx->state = FD_SNAPSHOT_STATE_IDLE;
     162           0 :       break;
     163           0 :     }
     164             : 
     165           0 :     case FD_SNAPSHOT_MSG_CTRL_ERROR: {
     166           0 :       FD_TEST( ctx->state!=FD_SNAPSHOT_STATE_SHUTDOWN );
     167           0 :       ctx->state = FD_SNAPSHOT_STATE_ERROR;
     168           0 :       break;
     169           0 :     }
     170             : 
     171           0 :     case FD_SNAPSHOT_MSG_CTRL_FAIL: {
     172           0 :       FD_TEST( ctx->state!=FD_SNAPSHOT_STATE_SHUTDOWN );
     173           0 :       ctx->state = FD_SNAPSHOT_STATE_IDLE;
     174           0 :       break;
     175           0 :     }
     176             : 
     177           0 :     case FD_SNAPSHOT_MSG_CTRL_SHUTDOWN: {
     178           0 :       FD_TEST( ctx->state==FD_SNAPSHOT_STATE_IDLE );
     179           0 :       ctx->state = FD_SNAPSHOT_STATE_SHUTDOWN;
     180           0 :       break;
     181           0 :     }
     182             : 
     183           0 :     default: {
     184           0 :       FD_LOG_ERR(( "unexpected control frag %s (%lu) in state %s (%lu)",
     185           0 :                    fd_ssctrl_msg_ctrl_str( sig ), sig,
     186           0 :                    fd_ssctrl_state_str( (ulong)ctx->state ), (ulong)ctx->state ));
     187           0 :       break;
     188           0 :     }
     189           0 :   }
     190             : 
     191             :   /* Forward the control message down the pipeline */
     192           0 :   if( FD_LIKELY( forward_msg ) ) {
     193           0 :     fd_stem_publish( stem, 0UL, sig, 0UL, 0UL, 0UL, 0UL, 0UL );
     194           0 :   }
     195           0 : }
     196             : 
     197             : static inline int
     198             : handle_data_frag( fd_snapdc_tile_t *  ctx,
     199             :                   fd_stem_context_t * stem,
     200             :                   ulong               chunk,
     201           0 :                   ulong               sz ) {
     202           0 :   if( FD_UNLIKELY( ctx->state==FD_SNAPSHOT_STATE_ERROR ) ) {
     203             :     /* Ignore all data frags after observing an error in the stream until
     204             :        we receive fail & init control messages to restart processing. */
     205           0 :     return 0;
     206           0 :   }
     207           0 :   if( FD_UNLIKELY( ctx->state!=FD_SNAPSHOT_STATE_PROCESSING ) ) {
     208           0 :     FD_LOG_ERR(( "received unexpected data frag in state %s (%lu)",
     209           0 :                  fd_ssctrl_state_str( (ulong)ctx->state ), (ulong)ctx->state ));
     210           0 :   }
     211             : 
     212           0 :   FD_TEST( chunk>=ctx->in.chunk0 && chunk<=ctx->in.wmark && sz<=ctx->in.mtu && sz>=ctx->in.frag_pos );
     213           0 :   uchar const * data = fd_chunk_to_laddr_const( ctx->in.mem, chunk );
     214           0 :   uchar const * in  = data+ctx->in.frag_pos;
     215           0 :   uchar * out = fd_chunk_to_laddr( ctx->out.mem, ctx->out.chunk );
     216             : 
     217           0 :   if( FD_UNLIKELY( !ctx->is_zstd ) ) {
     218           0 :     FD_TEST( ctx->in.frag_pos<sz );
     219           0 :     ulong cpy = fd_ulong_min( sz-ctx->in.frag_pos, ctx->out.mtu );
     220           0 :     fd_memcpy( out, in, cpy );
     221           0 :     fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_DATA, ctx->out.chunk, cpy, 0UL, 0UL, 0UL );
     222           0 :     ctx->out.chunk = fd_dcache_compact_next( ctx->out.chunk, cpy, ctx->out.chunk0, ctx->out.wmark );
     223             : 
     224           0 :     if( FD_LIKELY( ctx->full ) ) {
     225           0 :       ctx->metrics.full.compressed_bytes_read      += cpy;
     226           0 :       ctx->metrics.full.decompressed_bytes_written += cpy;
     227           0 :     } else {
     228           0 :       ctx->metrics.incremental.compressed_bytes_read      += cpy;
     229           0 :       ctx->metrics.incremental.decompressed_bytes_written += cpy;
     230           0 :     }
     231             : 
     232           0 :     ctx->in.frag_pos += cpy;
     233           0 :     FD_TEST( ctx->in.frag_pos<=sz );
     234           0 :     if( FD_UNLIKELY( ctx->in.frag_pos<sz ) ) return 1;
     235           0 :     ctx->in.frag_pos = 0UL;
     236           0 :     return 0;
     237           0 :   }
     238             : 
     239           0 :   ulong in_consumed = 0UL, out_produced = 0UL;
     240           0 :   ulong frame_res = ZSTD_decompressStream_simpleArgs(
     241           0 :       ctx->zstd,
     242           0 :       out,
     243           0 :       ctx->out.mtu,
     244           0 :       &out_produced,
     245           0 :       in,
     246           0 :       sz-ctx->in.frag_pos,
     247           0 :       &in_consumed );
     248           0 :   if( FD_UNLIKELY( ZSTD_isError( frame_res ) ) ) {
     249           0 :     FD_LOG_WARNING(( "error while decompressing %s snapshot (%u-%s)",
     250           0 :                      ctx->full ? "full" : "incremental",
     251           0 :                      ZSTD_getErrorCode( frame_res ), ZSTD_getErrorName( frame_res ) ));
     252           0 :     ctx->state = FD_SNAPSHOT_STATE_ERROR;
     253           0 :     fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_CTRL_ERROR, 0UL, 0UL, 0UL, 0UL, 0UL );
     254           0 :     return 0;
     255           0 :   }
     256             : 
     257           0 :   if( FD_LIKELY( out_produced ) ) {
     258           0 :     fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_DATA, ctx->out.chunk, out_produced, 0UL, 0UL, 0UL );
     259           0 :     ctx->out.chunk = fd_dcache_compact_next( ctx->out.chunk, out_produced, ctx->out.chunk0, ctx->out.wmark );
     260           0 :   }
     261             : 
     262           0 :   ctx->in.frag_pos += in_consumed;
     263           0 :   FD_TEST( ctx->in.frag_pos<=sz );
     264             : 
     265           0 :   if( FD_LIKELY( ctx->full ) ) {
     266           0 :     ctx->metrics.full.compressed_bytes_read      += in_consumed;
     267           0 :     ctx->metrics.full.decompressed_bytes_written += out_produced;
     268           0 :   } else {
     269           0 :     ctx->metrics.incremental.compressed_bytes_read      += in_consumed;
     270           0 :     ctx->metrics.incremental.decompressed_bytes_written += out_produced;
     271           0 :   }
     272             : 
     273           0 :   ctx->dirty = frame_res!=0UL;
     274             : 
     275           0 :   int maybe_more_output = out_produced==ctx->out.mtu || ctx->in.frag_pos<sz;
     276           0 :   if( FD_LIKELY( !maybe_more_output ) ) ctx->in.frag_pos = 0UL;
     277           0 :   return maybe_more_output;
     278           0 : }
     279             : 
     280             : static inline int
     281             : returnable_frag( fd_snapdc_tile_t *  ctx,
     282             :                  ulong               in_idx FD_PARAM_UNUSED,
     283             :                  ulong               seq    FD_PARAM_UNUSED,
     284             :                  ulong               sig,
     285             :                  ulong               chunk,
     286             :                  ulong               sz,
     287             :                  ulong               ctl    FD_PARAM_UNUSED,
     288             :                  ulong               tsorig FD_PARAM_UNUSED,
     289             :                  ulong               tspub  FD_PARAM_UNUSED,
     290           0 :                  fd_stem_context_t * stem ) {
     291           0 :   FD_TEST( ctx->state!=FD_SNAPSHOT_STATE_SHUTDOWN );
     292             : 
     293           0 :   if( FD_LIKELY( sig==FD_SNAPSHOT_MSG_DATA ) ) return handle_data_frag( ctx, stem, chunk, sz );
     294           0 :   else                                                handle_control_frag( ctx, stem, sig, chunk, sz );
     295             : 
     296           0 :   return 0;
     297           0 : }
     298             : 
     299             : static ulong
     300             : populate_allowed_fds( fd_topo_t      const * topo FD_PARAM_UNUSED,
     301             :                       fd_topo_tile_t const * tile FD_PARAM_UNUSED,
     302             :                       ulong                  out_fds_cnt,
     303           0 :                       int *                  out_fds ) {
     304           0 :   if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
     305             : 
     306           0 :   ulong out_cnt = 0;
     307           0 :   out_fds[ out_cnt++ ] = 2UL; /* stderr */
     308           0 :   if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) ) {
     309           0 :     out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
     310           0 :   }
     311             : 
     312           0 :   return out_cnt;
     313           0 : }
     314             : 
     315             : static ulong
     316             : populate_allowed_seccomp( fd_topo_t const *      topo FD_PARAM_UNUSED,
     317             :                           fd_topo_tile_t const * tile FD_PARAM_UNUSED,
     318             :                           ulong                  out_cnt,
     319           0 :                           struct sock_filter *   out ) {
     320           0 :   populate_sock_filter_policy_fd_snapdc_tile( out_cnt, out, (uint)fd_log_private_logfile_fd() );
     321           0 :   return sock_filter_policy_fd_snapdc_tile_instr_cnt;
     322           0 : }
     323             : 
     324             : static void
     325             : unprivileged_init( fd_topo_t const *      topo,
     326           0 :                    fd_topo_tile_t const * tile ) {
     327           0 :   void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
     328             : 
     329           0 :   FD_SCRATCH_ALLOC_INIT( l, scratch );
     330           0 :   fd_snapdc_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_snapdc_tile_t), sizeof(fd_snapdc_tile_t) );
     331           0 :   void * _zstd           = FD_SCRATCH_ALLOC_APPEND( l, 32UL,                      ZSTD_estimateDStreamSize( ZSTD_WINDOW_SZ ) );
     332             : 
     333           0 :   ctx->state = FD_SNAPSHOT_STATE_IDLE;
     334             : 
     335           0 :   ctx->zstd = ZSTD_initStaticDStream( _zstd, ZSTD_estimateDStreamSize( ZSTD_WINDOW_SZ ) );
     336           0 :   FD_TEST( ctx->zstd );
     337           0 :   FD_TEST( ctx->zstd==_zstd );
     338             : 
     339           0 :   ctx->dirty = 0;
     340           0 :   ctx->in.frag_pos = 0UL;
     341           0 :   fd_memset( &ctx->metrics, 0, sizeof(ctx->metrics) );
     342             : 
     343           0 :   if( FD_UNLIKELY( tile->in_cnt !=1UL ) ) FD_LOG_ERR(( "tile `" NAME "` has %lu ins, expected 1",  tile->in_cnt  ));
     344           0 :   if( FD_UNLIKELY( tile->out_cnt!=1UL ) ) FD_LOG_ERR(( "tile `" NAME "` has %lu outs, expected 1", tile->out_cnt ));
     345             : 
     346           0 :   fd_topo_link_t const * snapin_link = &topo->links[ tile->out_link_id[ 0UL ] ];
     347           0 :   FD_TEST( 0==strcmp( snapin_link->name, "snapdc_in" ) );
     348           0 :   ctx->out.mem    = topo->workspaces[ topo->objs[ snapin_link->dcache_obj_id ].wksp_id ].wksp;
     349           0 :   ctx->out.chunk0 = fd_dcache_compact_chunk0( ctx->out.mem, snapin_link->dcache );
     350           0 :   ctx->out.wmark  = fd_dcache_compact_wmark ( ctx->out.mem, snapin_link->dcache, snapin_link->mtu );
     351           0 :   ctx->out.chunk  = ctx->out.chunk0;
     352           0 :   ctx->out.mtu    = snapin_link->mtu;
     353             : 
     354           0 :   fd_topo_link_t const * in_link = &topo->links[ tile->in_link_id[ 0UL ] ];
     355           0 :   fd_topo_wksp_t const * in_wksp = &topo->workspaces[ topo->objs[ in_link->dcache_obj_id ].wksp_id ];
     356           0 :   ctx->in.mem                    = in_wksp->wksp;
     357           0 :   ctx->in.chunk0                 = fd_dcache_compact_chunk0( ctx->in.mem, in_link->dcache );
     358           0 :   ctx->in.wmark                  = fd_dcache_compact_wmark( ctx->in.mem, in_link->dcache, in_link->mtu );
     359           0 :   ctx->in.mtu                    = in_link->mtu;
     360             : 
     361           0 :   ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
     362           0 :   if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
     363           0 :     FD_LOG_ERR(( "scratch overflow %lu %lu %lu",
     364           0 :                  scratch_top - (ulong)scratch - scratch_footprint( tile ),
     365           0 :                  scratch_top,
     366           0 :                  (ulong)scratch + scratch_footprint( tile ) ));
     367           0 : }
     368             : 
     369             : /* handle_data_frag can publish one data frag plus an error frag */
     370           0 : #define STEM_BURST 2UL
     371             : 
     372           0 : #define STEM_LAZY  (128L*3000L)
     373             : 
     374           0 : #define STEM_CALLBACK_CONTEXT_TYPE  fd_snapdc_tile_t
     375           0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_snapdc_tile_t)
     376             : 
     377             : #define STEM_CALLBACK_SHOULD_SHUTDOWN should_shutdown
     378           0 : #define STEM_CALLBACK_METRICS_WRITE   metrics_write
     379           0 : #define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag
     380             : 
     381             : #include "../../disco/stem/fd_stem.c"
     382             : 
     383             : fd_topo_run_tile_t fd_tile_snapdc = {
     384             :   .name                     = NAME,
     385             :   .populate_allowed_fds     = populate_allowed_fds,
     386             :   .populate_allowed_seccomp = populate_allowed_seccomp,
     387             :   .scratch_align            = scratch_align,
     388             :   .scratch_footprint        = scratch_footprint,
     389             :   .unprivileged_init        = unprivileged_init,
     390             :   .run                      = stem_run,
     391             : };
     392             : 
     393             : #undef NAME

Generated by: LCOV version 1.14