Line data Source code
1 : #include "utils/fd_ssctrl.h"
2 :
3 : #include "../../disco/topo/fd_topo.h"
4 : #include "../../disco/metrics/fd_metrics.h"
5 :
6 : #include "generated/fd_snapdc_tile_seccomp.h"
7 :
8 : #define ZSTD_STATIC_LINKING_ONLY
9 : #include <zstd.h>
10 :
11 : #define NAME "snapdc"
12 :
13 0 : #define ZSTD_WINDOW_SZ (1UL<<25UL) /* 32MiB */
14 :
15 : /* The snapdc tile is a state machine that decompresses the full and
16 : optionally incremental snapshot byte stream that it receives from the
17 : snapld tile. In the event that the snapshot is already uncompressed,
18 : this tile simply copies the stream to the next tile in the pipeline. */
19 :
20 : struct fd_snapdc_tile {
21 : uint full : 1;
22 : uint is_zstd : 1;
23 : uint dirty : 1; /* in the middle of a frame? */
24 : int state;
25 :
26 : ZSTD_DCtx * zstd;
27 :
28 : struct {
29 : fd_wksp_t * mem;
30 : ulong chunk0;
31 : ulong wmark;
32 : ulong mtu;
33 : ulong frag_pos;
34 : } in;
35 :
36 : struct {
37 : fd_wksp_t * mem;
38 : ulong chunk0;
39 : ulong wmark;
40 : ulong chunk;
41 : ulong mtu;
42 : } out;
43 :
44 : struct {
45 : struct {
46 : ulong compressed_bytes_read;
47 : ulong decompressed_bytes_written;
48 : } full;
49 :
50 : struct {
51 : ulong compressed_bytes_read;
52 : ulong decompressed_bytes_written;
53 : } incremental;
54 : } metrics;
55 : };
56 : typedef struct fd_snapdc_tile fd_snapdc_tile_t;
57 :
58 : FD_FN_PURE static ulong
59 0 : scratch_align( void ) {
60 0 : return fd_ulong_max( alignof(fd_snapdc_tile_t), 32UL );
61 0 : }
62 :
63 : FD_FN_PURE static ulong
64 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
65 0 : (void)tile;
66 0 : ulong l = FD_LAYOUT_INIT;
67 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_snapdc_tile_t), sizeof(fd_snapdc_tile_t) );
68 0 : l = FD_LAYOUT_APPEND( l, 32UL, ZSTD_estimateDStreamSize( ZSTD_WINDOW_SZ ) );
69 0 : return FD_LAYOUT_FINI( l, scratch_align() );
70 0 : }
71 :
72 : static inline int
73 0 : should_shutdown( fd_snapdc_tile_t * ctx ) {
74 0 : return ctx->state==FD_SNAPSHOT_STATE_SHUTDOWN;
75 0 : }
76 :
77 : static void
78 0 : metrics_write( fd_snapdc_tile_t * ctx ) {
79 0 : FD_MGAUGE_SET( SNAPDC, FULL_COMPRESSED_BYTES_READ, ctx->metrics.full.compressed_bytes_read );
80 0 : FD_MGAUGE_SET( SNAPDC, FULL_DECOMPRESSED_BYTES_WRITTEN, ctx->metrics.full.decompressed_bytes_written );
81 :
82 0 : FD_MGAUGE_SET( SNAPDC, INCREMENTAL_COMPRESSED_BYTES_READ, ctx->metrics.incremental.compressed_bytes_read );
83 0 : FD_MGAUGE_SET( SNAPDC, INCREMENTAL_DECOMPRESSED_BYTES_WRITTEN, ctx->metrics.incremental.decompressed_bytes_written );
84 :
85 0 : FD_MGAUGE_SET( SNAPDC, STATE, (ulong)(ctx->state) );
86 0 : }
87 :
88 : static void
89 : transition_malformed( fd_snapdc_tile_t * ctx,
90 0 : fd_stem_context_t * stem ) {
91 0 : if( FD_UNLIKELY( ctx->state==FD_SNAPSHOT_STATE_ERROR ) ) return;
92 0 : ctx->state = FD_SNAPSHOT_STATE_ERROR;
93 0 : fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_CTRL_ERROR, 0UL, 0UL, 0UL, 0UL, 0UL );
94 0 : }
95 :
96 : static inline void
97 : handle_control_frag( fd_snapdc_tile_t * ctx,
98 : fd_stem_context_t * stem,
99 : ulong sig,
100 : ulong chunk,
101 0 : ulong sz ) {
102 0 : if( FD_UNLIKELY( sig==FD_SNAPSHOT_MSG_META ) ) return;
103 0 : if( FD_UNLIKELY( sig==FD_SNAPSHOT_MSG_LOAD_COMPLETE ) ) return;
104 :
105 : /* All control messages cause us to want to reset the decompression stream */
106 0 : ulong error = ZSTD_DCtx_reset( ctx->zstd, ZSTD_reset_session_only );
107 0 : if( FD_UNLIKELY( ZSTD_isError( error ) ) ) FD_LOG_ERR(( "ZSTD_DCtx_reset failed (%lu-%s)", error, ZSTD_getErrorName( error ) ));
108 :
109 0 : if( ctx->state==FD_SNAPSHOT_STATE_ERROR && sig!=FD_SNAPSHOT_MSG_CTRL_FAIL ) {
110 : /* Control messages move along the snapshot load pipeline. Since
111 : error conditions can be triggered by any tile in the pipeline,
112 : it is possible to be in error state and still receive otherwise
113 : valid messages. Only a fail message can revert this. */
114 0 : return;
115 0 : };
116 :
117 0 : int forward_msg = 1;
118 :
119 0 : switch( sig ) {
120 0 : case FD_SNAPSHOT_MSG_CTRL_INIT_FULL:
121 0 : case FD_SNAPSHOT_MSG_CTRL_INIT_INCR: {
122 0 : FD_TEST( ctx->state==FD_SNAPSHOT_STATE_IDLE );
123 0 : ctx->state = FD_SNAPSHOT_STATE_PROCESSING;
124 0 : FD_TEST( sz==sizeof(fd_ssctrl_init_t) );
125 0 : fd_ssctrl_init_t const * msg = fd_chunk_to_laddr_const( ctx->in.mem, chunk );
126 0 : ctx->full = sig==FD_SNAPSHOT_MSG_CTRL_INIT_FULL;
127 0 : ctx->is_zstd = !!msg->zstd;
128 0 : ctx->dirty = 0;
129 0 : ctx->in.frag_pos = 0UL;
130 0 : if( ctx->full ) {
131 0 : ctx->metrics.full.compressed_bytes_read = 0UL;
132 0 : ctx->metrics.full.decompressed_bytes_written = 0UL;
133 0 : } else {
134 0 : ctx->metrics.incremental.compressed_bytes_read = 0UL;
135 0 : ctx->metrics.incremental.decompressed_bytes_written = 0UL;
136 0 : }
137 0 : fd_ssctrl_init_t * msg_out = fd_chunk_to_laddr( ctx->out.mem, ctx->out.chunk );
138 0 : fd_memcpy( msg_out, msg, sz );
139 0 : fd_stem_publish( stem, 0UL, sig, ctx->out.chunk, sz, 0UL, 0UL, 0UL );
140 0 : ctx->out.chunk = fd_dcache_compact_next( ctx->out.chunk, ctx->out.mtu, ctx->out.chunk0, ctx->out.wmark );
141 0 : forward_msg = 0; // we forward the control message in the `fd_ssctrl_init_t` message
142 0 : break;
143 0 : }
144 :
145 0 : case FD_SNAPSHOT_MSG_CTRL_FINI: {
146 0 : FD_TEST( ctx->state==FD_SNAPSHOT_STATE_PROCESSING );
147 0 : ctx->state = FD_SNAPSHOT_STATE_FINISHING;
148 0 : if( FD_UNLIKELY( ctx->is_zstd && ctx->dirty ) ) {
149 0 : FD_LOG_WARNING(( "encountered end-of-file in the middle of a compressed frame for %s snapshot",
150 0 : ctx->full ? "full" : "incremental" ));
151 0 : transition_malformed( ctx, stem );
152 0 : forward_msg = 0;
153 0 : break;
154 0 : }
155 0 : break;
156 0 : }
157 :
158 0 : case FD_SNAPSHOT_MSG_CTRL_NEXT:
159 0 : case FD_SNAPSHOT_MSG_CTRL_DONE: {
160 0 : FD_TEST( ctx->state==FD_SNAPSHOT_STATE_FINISHING );
161 0 : ctx->state = FD_SNAPSHOT_STATE_IDLE;
162 0 : break;
163 0 : }
164 :
165 0 : case FD_SNAPSHOT_MSG_CTRL_ERROR: {
166 0 : FD_TEST( ctx->state!=FD_SNAPSHOT_STATE_SHUTDOWN );
167 0 : ctx->state = FD_SNAPSHOT_STATE_ERROR;
168 0 : break;
169 0 : }
170 :
171 0 : case FD_SNAPSHOT_MSG_CTRL_FAIL: {
172 0 : FD_TEST( ctx->state!=FD_SNAPSHOT_STATE_SHUTDOWN );
173 0 : ctx->state = FD_SNAPSHOT_STATE_IDLE;
174 0 : break;
175 0 : }
176 :
177 0 : case FD_SNAPSHOT_MSG_CTRL_SHUTDOWN: {
178 0 : FD_TEST( ctx->state==FD_SNAPSHOT_STATE_IDLE );
179 0 : ctx->state = FD_SNAPSHOT_STATE_SHUTDOWN;
180 0 : break;
181 0 : }
182 :
183 0 : default: {
184 0 : FD_LOG_ERR(( "unexpected control frag %s (%lu) in state %s (%lu)",
185 0 : fd_ssctrl_msg_ctrl_str( sig ), sig,
186 0 : fd_ssctrl_state_str( (ulong)ctx->state ), (ulong)ctx->state ));
187 0 : break;
188 0 : }
189 0 : }
190 :
191 : /* Forward the control message down the pipeline */
192 0 : if( FD_LIKELY( forward_msg ) ) {
193 0 : fd_stem_publish( stem, 0UL, sig, 0UL, 0UL, 0UL, 0UL, 0UL );
194 0 : }
195 0 : }
196 :
197 : static inline int
198 : handle_data_frag( fd_snapdc_tile_t * ctx,
199 : fd_stem_context_t * stem,
200 : ulong chunk,
201 0 : ulong sz ) {
202 0 : if( FD_UNLIKELY( ctx->state==FD_SNAPSHOT_STATE_ERROR ) ) {
203 : /* Ignore all data frags after observing an error in the stream until
204 : we receive fail & init control messages to restart processing. */
205 0 : return 0;
206 0 : }
207 0 : if( FD_UNLIKELY( ctx->state!=FD_SNAPSHOT_STATE_PROCESSING ) ) {
208 0 : FD_LOG_ERR(( "received unexpected data frag in state %s (%lu)",
209 0 : fd_ssctrl_state_str( (ulong)ctx->state ), (ulong)ctx->state ));
210 0 : }
211 :
212 0 : FD_TEST( chunk>=ctx->in.chunk0 && chunk<=ctx->in.wmark && sz<=ctx->in.mtu && sz>=ctx->in.frag_pos );
213 0 : uchar const * data = fd_chunk_to_laddr_const( ctx->in.mem, chunk );
214 0 : uchar const * in = data+ctx->in.frag_pos;
215 0 : uchar * out = fd_chunk_to_laddr( ctx->out.mem, ctx->out.chunk );
216 :
217 0 : if( FD_UNLIKELY( !ctx->is_zstd ) ) {
218 0 : FD_TEST( ctx->in.frag_pos<sz );
219 0 : ulong cpy = fd_ulong_min( sz-ctx->in.frag_pos, ctx->out.mtu );
220 0 : fd_memcpy( out, in, cpy );
221 0 : fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_DATA, ctx->out.chunk, cpy, 0UL, 0UL, 0UL );
222 0 : ctx->out.chunk = fd_dcache_compact_next( ctx->out.chunk, cpy, ctx->out.chunk0, ctx->out.wmark );
223 :
224 0 : if( FD_LIKELY( ctx->full ) ) {
225 0 : ctx->metrics.full.compressed_bytes_read += cpy;
226 0 : ctx->metrics.full.decompressed_bytes_written += cpy;
227 0 : } else {
228 0 : ctx->metrics.incremental.compressed_bytes_read += cpy;
229 0 : ctx->metrics.incremental.decompressed_bytes_written += cpy;
230 0 : }
231 :
232 0 : ctx->in.frag_pos += cpy;
233 0 : FD_TEST( ctx->in.frag_pos<=sz );
234 0 : if( FD_UNLIKELY( ctx->in.frag_pos<sz ) ) return 1;
235 0 : ctx->in.frag_pos = 0UL;
236 0 : return 0;
237 0 : }
238 :
239 0 : ulong in_consumed = 0UL, out_produced = 0UL;
240 0 : ulong frame_res = ZSTD_decompressStream_simpleArgs(
241 0 : ctx->zstd,
242 0 : out,
243 0 : ctx->out.mtu,
244 0 : &out_produced,
245 0 : in,
246 0 : sz-ctx->in.frag_pos,
247 0 : &in_consumed );
248 0 : if( FD_UNLIKELY( ZSTD_isError( frame_res ) ) ) {
249 0 : FD_LOG_WARNING(( "error while decompressing %s snapshot (%u-%s)",
250 0 : ctx->full ? "full" : "incremental",
251 0 : ZSTD_getErrorCode( frame_res ), ZSTD_getErrorName( frame_res ) ));
252 0 : ctx->state = FD_SNAPSHOT_STATE_ERROR;
253 0 : fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_CTRL_ERROR, 0UL, 0UL, 0UL, 0UL, 0UL );
254 0 : return 0;
255 0 : }
256 :
257 0 : if( FD_LIKELY( out_produced ) ) {
258 0 : fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_DATA, ctx->out.chunk, out_produced, 0UL, 0UL, 0UL );
259 0 : ctx->out.chunk = fd_dcache_compact_next( ctx->out.chunk, out_produced, ctx->out.chunk0, ctx->out.wmark );
260 0 : }
261 :
262 0 : ctx->in.frag_pos += in_consumed;
263 0 : FD_TEST( ctx->in.frag_pos<=sz );
264 :
265 0 : if( FD_LIKELY( ctx->full ) ) {
266 0 : ctx->metrics.full.compressed_bytes_read += in_consumed;
267 0 : ctx->metrics.full.decompressed_bytes_written += out_produced;
268 0 : } else {
269 0 : ctx->metrics.incremental.compressed_bytes_read += in_consumed;
270 0 : ctx->metrics.incremental.decompressed_bytes_written += out_produced;
271 0 : }
272 :
273 0 : ctx->dirty = frame_res!=0UL;
274 :
275 0 : int maybe_more_output = out_produced==ctx->out.mtu || ctx->in.frag_pos<sz;
276 0 : if( FD_LIKELY( !maybe_more_output ) ) ctx->in.frag_pos = 0UL;
277 0 : return maybe_more_output;
278 0 : }
279 :
280 : static inline int
281 : returnable_frag( fd_snapdc_tile_t * ctx,
282 : ulong in_idx FD_PARAM_UNUSED,
283 : ulong seq FD_PARAM_UNUSED,
284 : ulong sig,
285 : ulong chunk,
286 : ulong sz,
287 : ulong ctl FD_PARAM_UNUSED,
288 : ulong tsorig FD_PARAM_UNUSED,
289 : ulong tspub FD_PARAM_UNUSED,
290 0 : fd_stem_context_t * stem ) {
291 0 : FD_TEST( ctx->state!=FD_SNAPSHOT_STATE_SHUTDOWN );
292 :
293 0 : if( FD_LIKELY( sig==FD_SNAPSHOT_MSG_DATA ) ) return handle_data_frag( ctx, stem, chunk, sz );
294 0 : else handle_control_frag( ctx, stem, sig, chunk, sz );
295 :
296 0 : return 0;
297 0 : }
298 :
299 : static ulong
300 : populate_allowed_fds( fd_topo_t const * topo FD_PARAM_UNUSED,
301 : fd_topo_tile_t const * tile FD_PARAM_UNUSED,
302 : ulong out_fds_cnt,
303 0 : int * out_fds ) {
304 0 : if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
305 :
306 0 : ulong out_cnt = 0;
307 0 : out_fds[ out_cnt++ ] = 2UL; /* stderr */
308 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) ) {
309 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
310 0 : }
311 :
312 0 : return out_cnt;
313 0 : }
314 :
315 : static ulong
316 : populate_allowed_seccomp( fd_topo_t const * topo FD_PARAM_UNUSED,
317 : fd_topo_tile_t const * tile FD_PARAM_UNUSED,
318 : ulong out_cnt,
319 0 : struct sock_filter * out ) {
320 0 : populate_sock_filter_policy_fd_snapdc_tile( out_cnt, out, (uint)fd_log_private_logfile_fd() );
321 0 : return sock_filter_policy_fd_snapdc_tile_instr_cnt;
322 0 : }
323 :
324 : static void
325 : unprivileged_init( fd_topo_t const * topo,
326 0 : fd_topo_tile_t const * tile ) {
327 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
328 :
329 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
330 0 : fd_snapdc_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_snapdc_tile_t), sizeof(fd_snapdc_tile_t) );
331 0 : void * _zstd = FD_SCRATCH_ALLOC_APPEND( l, 32UL, ZSTD_estimateDStreamSize( ZSTD_WINDOW_SZ ) );
332 :
333 0 : ctx->state = FD_SNAPSHOT_STATE_IDLE;
334 :
335 0 : ctx->zstd = ZSTD_initStaticDStream( _zstd, ZSTD_estimateDStreamSize( ZSTD_WINDOW_SZ ) );
336 0 : FD_TEST( ctx->zstd );
337 0 : FD_TEST( ctx->zstd==_zstd );
338 :
339 0 : ctx->dirty = 0;
340 0 : ctx->in.frag_pos = 0UL;
341 0 : fd_memset( &ctx->metrics, 0, sizeof(ctx->metrics) );
342 :
343 0 : if( FD_UNLIKELY( tile->in_cnt !=1UL ) ) FD_LOG_ERR(( "tile `" NAME "` has %lu ins, expected 1", tile->in_cnt ));
344 0 : if( FD_UNLIKELY( tile->out_cnt!=1UL ) ) FD_LOG_ERR(( "tile `" NAME "` has %lu outs, expected 1", tile->out_cnt ));
345 :
346 0 : fd_topo_link_t const * snapin_link = &topo->links[ tile->out_link_id[ 0UL ] ];
347 0 : FD_TEST( 0==strcmp( snapin_link->name, "snapdc_in" ) );
348 0 : ctx->out.mem = topo->workspaces[ topo->objs[ snapin_link->dcache_obj_id ].wksp_id ].wksp;
349 0 : ctx->out.chunk0 = fd_dcache_compact_chunk0( ctx->out.mem, snapin_link->dcache );
350 0 : ctx->out.wmark = fd_dcache_compact_wmark ( ctx->out.mem, snapin_link->dcache, snapin_link->mtu );
351 0 : ctx->out.chunk = ctx->out.chunk0;
352 0 : ctx->out.mtu = snapin_link->mtu;
353 :
354 0 : fd_topo_link_t const * in_link = &topo->links[ tile->in_link_id[ 0UL ] ];
355 0 : fd_topo_wksp_t const * in_wksp = &topo->workspaces[ topo->objs[ in_link->dcache_obj_id ].wksp_id ];
356 0 : ctx->in.mem = in_wksp->wksp;
357 0 : ctx->in.chunk0 = fd_dcache_compact_chunk0( ctx->in.mem, in_link->dcache );
358 0 : ctx->in.wmark = fd_dcache_compact_wmark( ctx->in.mem, in_link->dcache, in_link->mtu );
359 0 : ctx->in.mtu = in_link->mtu;
360 :
361 0 : ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
362 0 : if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
363 0 : FD_LOG_ERR(( "scratch overflow %lu %lu %lu",
364 0 : scratch_top - (ulong)scratch - scratch_footprint( tile ),
365 0 : scratch_top,
366 0 : (ulong)scratch + scratch_footprint( tile ) ));
367 0 : }
368 :
369 : /* handle_data_frag can publish one data frag plus an error frag */
370 0 : #define STEM_BURST 2UL
371 :
372 0 : #define STEM_LAZY (128L*3000L)
373 :
374 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_snapdc_tile_t
375 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_snapdc_tile_t)
376 :
377 : #define STEM_CALLBACK_SHOULD_SHUTDOWN should_shutdown
378 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
379 0 : #define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag
380 :
381 : #include "../../disco/stem/fd_stem.c"
382 :
383 : fd_topo_run_tile_t fd_tile_snapdc = {
384 : .name = NAME,
385 : .populate_allowed_fds = populate_allowed_fds,
386 : .populate_allowed_seccomp = populate_allowed_seccomp,
387 : .scratch_align = scratch_align,
388 : .scratch_footprint = scratch_footprint,
389 : .unprivileged_init = unprivileged_init,
390 : .run = stem_run,
391 : };
392 :
393 : #undef NAME
|