Line data Source code
1 : #include "utils/fd_ssctrl.h"
2 :
3 : #include "../../disco/topo/fd_topo.h"
4 : #include "../../disco/metrics/fd_metrics.h"
5 :
6 : #include "generated/fd_snapdc_tile_seccomp.h"
7 :
8 : #define ZSTD_STATIC_LINKING_ONLY
9 : #include <zstd.h>
10 :
11 : #define NAME "snapdc"
12 :
13 0 : #define ZSTD_WINDOW_SZ (1UL<<25UL) /* 32MiB */
14 :
15 : /* The snapdc tile is a state machine that decompresses the full and
16 : optionally incremental snapshot byte stream that it receives from the
17 : snapld tile. */
18 :
19 : struct fd_snapdc_tile {
20 : int full;
21 : int state;
22 :
23 : ZSTD_DCtx * zstd;
24 :
25 : struct {
26 : fd_wksp_t * wksp;
27 : ulong chunk0;
28 : ulong wmark;
29 : ulong mtu;
30 : ulong frag_pos;
31 : } in;
32 :
33 : struct {
34 : fd_wksp_t * wksp;
35 : ulong chunk0;
36 : ulong wmark;
37 : ulong chunk;
38 : ulong mtu;
39 : } out;
40 :
41 : struct {
42 : struct {
43 : ulong compressed_bytes_read;
44 : ulong decompressed_bytes_read;
45 : } full;
46 :
47 : struct {
48 : ulong compressed_bytes_read;
49 : ulong decompressed_bytes_read;
50 : } incremental;
51 : } metrics;
52 : };
53 : typedef struct fd_snapdc_tile fd_snapdc_tile_t;
54 :
55 : FD_FN_PURE static ulong
56 0 : scratch_align( void ) {
57 0 : return fd_ulong_max( alignof(fd_snapdc_tile_t), 32UL );
58 0 : }
59 :
60 : FD_FN_PURE static ulong
61 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
62 0 : (void)tile;
63 0 : ulong l = FD_LAYOUT_INIT;
64 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_snapdc_tile_t), sizeof(fd_snapdc_tile_t) );
65 0 : l = FD_LAYOUT_APPEND( l, 32UL, ZSTD_estimateDStreamSize( ZSTD_WINDOW_SZ ) );
66 0 : return FD_LAYOUT_FINI( l, scratch_align() );
67 0 : }
68 :
69 : static inline int
70 0 : should_shutdown( fd_snapdc_tile_t * ctx ) {
71 0 : return ctx->state==FD_SNAPSHOT_STATE_SHUTDOWN;
72 0 : }
73 :
74 : static void
75 0 : metrics_write( fd_snapdc_tile_t * ctx ) {
76 0 : FD_MGAUGE_SET( SNAPDC, FULL_COMPRESSED_BYTES_READ, ctx->metrics.full.compressed_bytes_read );
77 0 : FD_MGAUGE_SET( SNAPDC, FULL_DECOMPRESSED_BYTES_READ, ctx->metrics.full.decompressed_bytes_read );
78 :
79 0 : FD_MGAUGE_SET( SNAPDC, INCREMENTAL_COMPRESSED_BYTES_READ, ctx->metrics.incremental.compressed_bytes_read );
80 0 : FD_MGAUGE_SET( SNAPDC, INCREMENTAL_DECOMPRESSED_BYTES_READ, ctx->metrics.incremental.decompressed_bytes_read );
81 :
82 0 : FD_MGAUGE_SET( SNAPDC, STATE, (ulong)(ctx->state) );
83 0 : }
84 :
85 : static inline void
86 : handle_control_frag( fd_snapdc_tile_t * ctx,
87 : fd_stem_context_t * stem,
88 0 : ulong sig ) {
89 0 : if( FD_UNLIKELY( sig==FD_SNAPSHOT_MSG_META ) ) return;
90 :
91 : /* All control messages cause us to want to reset the decompression stream */
92 0 : ulong error = ZSTD_DCtx_reset( ctx->zstd, ZSTD_reset_session_only );
93 0 : if( FD_UNLIKELY( ZSTD_isError( error ) ) ) FD_LOG_ERR(( "ZSTD_DCtx_reset failed (%lu-%s)", error, ZSTD_getErrorName( error ) ));
94 :
95 0 : switch( sig ) {
96 0 : case FD_SNAPSHOT_MSG_CTRL_INIT_FULL:
97 0 : FD_TEST( ctx->state==FD_SNAPSHOT_STATE_IDLE );
98 0 : ctx->state = FD_SNAPSHOT_STATE_PROCESSING;
99 0 : ctx->full = 1;
100 0 : ctx->in.frag_pos = 0UL;
101 0 : ctx->metrics.full.compressed_bytes_read = 0UL;
102 0 : ctx->metrics.full.decompressed_bytes_read = 0UL;
103 0 : break;
104 0 : case FD_SNAPSHOT_MSG_CTRL_INIT_INCR:
105 0 : FD_TEST( ctx->state==FD_SNAPSHOT_STATE_IDLE );
106 0 : ctx->state = FD_SNAPSHOT_STATE_PROCESSING;
107 0 : ctx->full = 0;
108 0 : ctx->in.frag_pos = 0UL;
109 0 : ctx->metrics.incremental.compressed_bytes_read = 0UL;
110 0 : ctx->metrics.incremental.decompressed_bytes_read = 0UL;
111 0 : break;
112 0 : case FD_SNAPSHOT_MSG_CTRL_FAIL:
113 0 : FD_TEST( ctx->state==FD_SNAPSHOT_STATE_PROCESSING ||
114 0 : ctx->state==FD_SNAPSHOT_STATE_FINISHING ||
115 0 : ctx->state==FD_SNAPSHOT_STATE_ERROR );
116 0 : ctx->state = FD_SNAPSHOT_STATE_IDLE;
117 0 : break;
118 0 : case FD_SNAPSHOT_MSG_CTRL_NEXT:
119 0 : case FD_SNAPSHOT_MSG_CTRL_DONE:
120 0 : FD_TEST( ctx->state==FD_SNAPSHOT_STATE_PROCESSING ||
121 0 : ctx->state==FD_SNAPSHOT_STATE_FINISHING ||
122 0 : ctx->state==FD_SNAPSHOT_STATE_ERROR );
123 0 : if( FD_UNLIKELY( ctx->state!=FD_SNAPSHOT_STATE_FINISHING ) ) {
124 0 : ctx->state = FD_SNAPSHOT_STATE_ERROR;
125 0 : fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_CTRL_ERROR, 0UL, 0UL, 0UL, 0UL, 0UL );
126 0 : return;
127 0 : }
128 0 : ctx->state = FD_SNAPSHOT_STATE_IDLE;
129 0 : break;
130 0 : case FD_SNAPSHOT_MSG_CTRL_SHUTDOWN:
131 0 : FD_TEST( ctx->state==FD_SNAPSHOT_STATE_IDLE );
132 0 : ctx->state = FD_SNAPSHOT_STATE_SHUTDOWN;
133 0 : metrics_write( ctx ); /* ensures that shutdown state is written to metrics workspace before the tile actually shuts down */
134 0 : break;
135 0 : case FD_SNAPSHOT_MSG_CTRL_ERROR:
136 0 : ctx->state = FD_SNAPSHOT_STATE_ERROR;
137 0 : break;
138 0 : default:
139 0 : FD_LOG_ERR(( "unexpected control sig %lu", sig ));
140 0 : return;
141 0 : }
142 :
143 : /* Forward the control message down the pipeline */
144 0 : fd_stem_publish( stem, 0UL, sig, 0UL, 0UL, 0UL, 0UL, 0UL );
145 0 : }
146 :
147 : static inline int
148 : handle_data_frag( fd_snapdc_tile_t * ctx,
149 : fd_stem_context_t * stem,
150 : ulong chunk,
151 0 : ulong sz ) {
152 0 : if( FD_UNLIKELY( ctx->state==FD_SNAPSHOT_STATE_FINISHING ) ) {
153 : /* We thought the snapshot was finished (we already read the full
154 : frame) and then we got another data fragment from the reader.
155 : This means the snapshot has extra padding or garbage on the end,
156 : which we don't trust so just abandon it completely. */
157 0 : ctx->state = FD_SNAPSHOT_STATE_ERROR;
158 0 : fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_CTRL_ERROR, 0UL, 0UL, 0UL, 0UL, 0UL );
159 0 : return 0;
160 0 : }
161 0 : else if( FD_UNLIKELY( ctx->state==FD_SNAPSHOT_STATE_ERROR ) ) {
162 : /* Ignore all data frags after observing an error in the stream until
163 : we receive fail & init control messages to restart processing. */
164 0 : return 0;
165 0 : }
166 0 : else if( FD_UNLIKELY( ctx->state!=FD_SNAPSHOT_STATE_PROCESSING ) ) {
167 0 : FD_LOG_ERR(( "invalid state for data frag %d", ctx->state ));
168 0 : }
169 :
170 0 : FD_TEST( chunk>=ctx->in.chunk0 && chunk<=ctx->in.wmark && sz<=ctx->in.mtu && sz>=ctx->in.frag_pos );
171 0 : uchar const * data = fd_chunk_to_laddr_const( ctx->in.wksp, chunk );
172 0 : uchar const * in = data+ctx->in.frag_pos;
173 0 : uchar * out = fd_chunk_to_laddr( ctx->out.wksp, ctx->out.chunk );
174 0 : ulong in_consumed = 0UL, out_produced = 0UL;
175 0 : ulong error = ZSTD_decompressStream_simpleArgs( ctx->zstd,
176 0 : out,
177 0 : ctx->out.mtu,
178 0 : &out_produced,
179 0 : in,
180 0 : sz-ctx->in.frag_pos,
181 0 : &in_consumed );
182 0 : if( FD_UNLIKELY( ZSTD_isError( error ) ) ) {
183 0 : ctx->state = FD_SNAPSHOT_STATE_ERROR;
184 0 : fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_CTRL_ERROR, 0UL, 0UL, 0UL, 0UL, 0UL );
185 0 : return 0;
186 0 : }
187 :
188 0 : if( FD_LIKELY( out_produced ) ) {
189 0 : fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_DATA, ctx->out.chunk, out_produced, 0UL, 0UL, 0UL );
190 0 : ctx->out.chunk = fd_dcache_compact_next( ctx->out.chunk, out_produced, ctx->out.chunk0, ctx->out.wmark );
191 0 : }
192 :
193 0 : ctx->in.frag_pos += in_consumed;
194 0 : FD_TEST( ctx->in.frag_pos<=sz );
195 :
196 0 : if( FD_LIKELY( ctx->full ) ) {
197 0 : ctx->metrics.full.compressed_bytes_read += in_consumed;
198 0 : ctx->metrics.full.decompressed_bytes_read += out_produced;
199 0 : } else {
200 0 : ctx->metrics.incremental.compressed_bytes_read += in_consumed;
201 0 : ctx->metrics.incremental.decompressed_bytes_read += out_produced;
202 0 : }
203 :
204 0 : if( FD_UNLIKELY( !error ) ) {
205 0 : if( FD_UNLIKELY( ctx->in.frag_pos!=sz ) ) {
206 : /* Zstandard finished decoding the snapshot frame (the whole
207 : snapshot is a single frame), but, the fragment we got from
208 : the snapshot reader has not been fully consumed, so there is
209 : some trailing padding or garbage at the end of the snapshot.
210 :
211 : This is not valid under the snapshot format and indicates a
212 : problem so we abandon the snapshot. */
213 0 : ctx->state = FD_SNAPSHOT_STATE_ERROR;
214 0 : fd_stem_publish( stem, 0UL, FD_SNAPSHOT_MSG_CTRL_ERROR, 0UL, 0UL, 0UL, 0UL, 0UL );
215 0 : return 0;
216 0 : }
217 :
218 0 : ctx->state = FD_SNAPSHOT_STATE_FINISHING;
219 0 : }
220 :
221 0 : int maybe_more_output = out_produced==ctx->out.mtu || ctx->in.frag_pos<sz;
222 0 : if( FD_LIKELY( !maybe_more_output ) ) ctx->in.frag_pos = 0UL;
223 0 : return maybe_more_output;
224 0 : }
225 :
226 : static inline int
227 : returnable_frag( fd_snapdc_tile_t * ctx,
228 : ulong in_idx FD_PARAM_UNUSED,
229 : ulong seq FD_PARAM_UNUSED,
230 : ulong sig,
231 : ulong chunk,
232 : ulong sz,
233 : ulong ctl FD_PARAM_UNUSED,
234 : ulong tsorig FD_PARAM_UNUSED,
235 : ulong tspub FD_PARAM_UNUSED,
236 0 : fd_stem_context_t * stem ) {
237 0 : FD_TEST( ctx->state!=FD_SNAPSHOT_STATE_SHUTDOWN );
238 :
239 0 : if( FD_LIKELY( sig==FD_SNAPSHOT_MSG_DATA ) ) return handle_data_frag( ctx, stem, chunk, sz );
240 0 : else handle_control_frag( ctx, stem, sig );
241 :
242 0 : return 0;
243 0 : }
244 :
245 : static ulong
246 : populate_allowed_fds( fd_topo_t const * topo FD_PARAM_UNUSED,
247 : fd_topo_tile_t const * tile FD_PARAM_UNUSED,
248 : ulong out_fds_cnt,
249 0 : int * out_fds ) {
250 0 : if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
251 :
252 0 : ulong out_cnt = 0;
253 0 : out_fds[ out_cnt++ ] = 2UL; /* stderr */
254 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) ) {
255 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
256 0 : }
257 :
258 0 : return out_cnt;
259 0 : }
260 :
261 : static ulong
262 : populate_allowed_seccomp( fd_topo_t const * topo FD_PARAM_UNUSED,
263 : fd_topo_tile_t const * tile FD_PARAM_UNUSED,
264 : ulong out_cnt,
265 0 : struct sock_filter * out ) {
266 0 : populate_sock_filter_policy_fd_snapdc_tile( out_cnt, out, (uint)fd_log_private_logfile_fd() );
267 0 : return sock_filter_policy_fd_snapdc_tile_instr_cnt;
268 0 : }
269 :
270 : static void
271 : unprivileged_init( fd_topo_t * topo,
272 0 : fd_topo_tile_t * tile ) {
273 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
274 :
275 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
276 0 : fd_snapdc_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_snapdc_tile_t), sizeof(fd_snapdc_tile_t) );
277 0 : void * _zstd = FD_SCRATCH_ALLOC_APPEND( l, 32UL, ZSTD_estimateDStreamSize( ZSTD_WINDOW_SZ ) );
278 :
279 0 : ctx->state = FD_SNAPSHOT_STATE_IDLE;
280 :
281 0 : ctx->zstd = ZSTD_initStaticDStream( _zstd, ZSTD_estimateDStreamSize( ZSTD_WINDOW_SZ ) );
282 0 : FD_TEST( ctx->zstd );
283 0 : FD_TEST( ctx->zstd==_zstd );
284 :
285 0 : ctx->in.frag_pos = 0UL;
286 0 : fd_memset( &ctx->metrics, 0, sizeof(ctx->metrics) );
287 :
288 0 : if( FD_UNLIKELY( tile->in_cnt !=1UL ) ) FD_LOG_ERR(( "tile `" NAME "` has %lu ins, expected 1", tile->in_cnt ));
289 0 : if( FD_UNLIKELY( tile->out_cnt!=1UL ) ) FD_LOG_ERR(( "tile `" NAME "` has %lu outs, expected 1", tile->out_cnt ));
290 :
291 0 : fd_topo_link_t * snapin_link = &topo->links[ tile->out_link_id[ 0UL ] ];
292 0 : ctx->out.wksp = topo->workspaces[ topo->objs[ snapin_link->dcache_obj_id ].wksp_id ].wksp;
293 0 : ctx->out.chunk0 = fd_dcache_compact_chunk0( ctx->out.wksp, snapin_link->dcache );
294 0 : ctx->out.wmark = fd_dcache_compact_wmark ( ctx->out.wksp, snapin_link->dcache, snapin_link->mtu );
295 0 : ctx->out.chunk = ctx->out.chunk0;
296 0 : ctx->out.mtu = snapin_link->mtu;
297 :
298 0 : fd_topo_link_t const * in_link = &topo->links[ tile->in_link_id[ 0UL ] ];
299 0 : fd_topo_wksp_t const * in_wksp = &topo->workspaces[ topo->objs[ in_link->dcache_obj_id ].wksp_id ];
300 0 : ctx->in.wksp = in_wksp->wksp;;
301 0 : ctx->in.chunk0 = fd_dcache_compact_chunk0( ctx->in.wksp, in_link->dcache );
302 0 : ctx->in.wmark = fd_dcache_compact_wmark( ctx->in.wksp, in_link->dcache, in_link->mtu );
303 0 : ctx->in.mtu = in_link->mtu;
304 :
305 0 : ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, 1UL );
306 0 : if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
307 0 : FD_LOG_ERR(( "scratch overflow %lu %lu %lu",
308 0 : scratch_top - (ulong)scratch - scratch_footprint( tile ),
309 0 : scratch_top,
310 0 : (ulong)scratch + scratch_footprint( tile ) ));
311 0 : }
312 :
313 : /* handle_data_frag can publish one data frag plus an error frag */
314 0 : #define STEM_BURST 2UL
315 :
316 0 : #define STEM_LAZY 1000L
317 :
318 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_snapdc_tile_t
319 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_snapdc_tile_t)
320 :
321 : #define STEM_CALLBACK_SHOULD_SHUTDOWN should_shutdown
322 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
323 0 : #define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag
324 :
325 : #include "../../disco/stem/fd_stem.c"
326 :
327 : fd_topo_run_tile_t fd_tile_snapdc = {
328 : .name = NAME,
329 : .populate_allowed_fds = populate_allowed_fds,
330 : .populate_allowed_seccomp = populate_allowed_seccomp,
331 : .scratch_align = scratch_align,
332 : .scratch_footprint = scratch_footprint,
333 : .unprivileged_init = unprivileged_init,
334 : .run = stem_run,
335 : };
336 :
337 : #undef NAME
|