Line data Source code
1 : #include "fd_wksp_private.h"
2 :
3 : #include <stdio.h>
4 : #include <errno.h>
5 : #include <unistd.h>
6 : #include <fcntl.h>
7 : #include <sys/stat.h>
8 :
9 : /* This is an implementation detail and not strictly part of the v2
10 : specification. */
11 :
12 : #define FD_WKSP_RESTORE_V2_CGROUP_MAX (1024UL)
13 :
14 : /* Note: restore not in frame on entry, restore at off on exit. Jumps
15 : to fail on error (logs details). */
16 :
17 273 : #define RESTORE_SEEK(off) do { \
18 273 : ulong _off = (off); \
19 273 : if( FD_UNLIKELY( fd_restore_seek( restore, _off ) ) ) goto fail; /* logs details */ \
20 273 : } while(0)
21 :
22 : /* Note: restore not in frame and at start of frame on entry, restore in
23 : frame on exit. Jumps to fail on error (logs details). */
24 :
25 462 : #define RESTORE_OPEN(frame_style) do { \
26 462 : if( FD_UNLIKELY( fd_restore_open_advanced( restore, (frame_style), &frame_off ) ) ) goto fail; /* logs details */ \
27 462 : } while(0)
28 :
29 : /* Note: restore in frame on entry, restore just after frame on exit.
30 : Assumes frame fully processed. Jumps to fail on error (logs
31 : details). */
32 :
33 462 : #define RESTORE_CLOSE() do { \
34 462 : if( FD_UNLIKELY( fd_restore_close_advanced( restore, &frame_off ) ) ) goto fail; /* logs details */ \
35 462 : } while(0)
36 :
37 : /* Note: restore in frame at meta and sz must be at most
38 : FD_RESTORE_META_MAX on entry, restore in frame at just past meta with
39 : meta ready on exit. Jumps to fail on error (logs details). */
40 :
41 348 : #define RESTORE_META( meta, sz ) do { \
42 348 : ulong _sz = (sz); \
43 348 : int _err = fd_restore_meta( restore, (meta), _sz ); /* logs details */ \
44 348 : if( FD_UNLIKELY( _err ) ) { \
45 0 : FD_LOG_WARNING(( "fd_restore_meta( %s, %lu ) failed (%i-%s)", \
46 0 : #meta, _sz, _err, fd_checkpt_strerror( _err ) )); \
47 0 : goto fail; \
48 0 : } \
49 348 : } while(0)
50 :
51 : /* Note: restore in frame at data on entry, restore in frame just past
52 : data on exit, data potentially not ready until frame close and should
53 : exist untouched until then (logs details). */
54 :
55 516 : #define RESTORE_DATA( data, sz ) do { \
56 516 : ulong _sz = (sz); \
57 516 : int _err = fd_restore_data( restore, (data), _sz ); /* logs details */ \
58 516 : if( FD_UNLIKELY( _err ) ) { \
59 0 : FD_LOG_WARNING(( "fd_restore_data( %s, %lu ) failed (%i-%s)", \
60 0 : #data, _sz, _err, fd_checkpt_strerror( _err ) )); \
61 0 : goto fail; \
62 0 : } \
63 516 : } while(0)
64 :
65 : /* Note: jumps to fail if c is not true (logs details) */
66 :
67 5022 : #define RESTORE_TEST( c ) do { \
68 5022 : if( FD_UNLIKELY( !(c) ) ) { \
69 0 : FD_LOG_WARNING(( "restore test %s failed", #c )); \
70 0 : goto fail; \
71 0 : } \
72 5022 : } while(0)
73 :
74 : /* fd_wksp_restore_v2_hdr restores the header frame from a wksp checkpt.
75 : Assumes restore is valid and at the frame start and hdr is valid. On
76 : success, returns SUCCESS, *hdr will be populated with a valid data
77 : and restore will be just after the frame end. On failure, returns
78 : FAIL, *hdr is clobbered and the caller should not assume anything
79 : about the restore state. */
80 :
81 : static int
82 : fd_wksp_restore_v2_hdr( fd_restore_t * restore,
83 126 : fd_wksp_checkpt_v2_hdr_t * hdr ) {
84 126 : ulong frame_off;
85 :
86 126 : RESTORE_OPEN( FD_CHECKPT_FRAME_STYLE_RAW );
87 126 : RESTORE_DATA( hdr, sizeof(fd_wksp_checkpt_v2_hdr_t) );
88 126 : RESTORE_CLOSE();
89 :
90 126 : ulong name_len = fd_shmem_name_len( hdr->name );
91 : /* FIXME: CHECK TRAILING 0 OF NAME? */
92 :
93 126 : RESTORE_TEST( hdr->magic==FD_WKSP_MAGIC );
94 126 : RESTORE_TEST( hdr->style==FD_WKSP_CHECKPT_STYLE_V2 );
95 126 : RESTORE_TEST( fd_checkpt_frame_style_is_supported( hdr->frame_style_compressed ) );
96 126 : RESTORE_TEST( hdr->reserved==0U );
97 126 : RESTORE_TEST( name_len>0UL );
98 : /* ignore seed (arbitrary) */
99 126 : RESTORE_TEST( fd_wksp_footprint( hdr->part_max, hdr->data_max )>0UL );
100 :
101 126 : return FD_WKSP_SUCCESS;
102 :
103 0 : fail:
104 0 : return FD_WKSP_ERR_FAIL;
105 126 : }
106 :
107 : /* fd_wksp_restore_v2_info restores the info frame from a wksp checkpt.
108 : Assumes restore is valid and at the frame start, hdr has info from
109 : the corresponding header, info_buf has room for buf_max bytes and
110 : info_cstr is valid. On success, returns SUCCESS, *info will be
111 : populated with a valid data, info_cstr will be populated with
112 : pointers into info_buf to valid info cstr (indexed in the same order
113 : as the info fields) and restore will be just after the frame end. On
114 : failure, returns FAIL, info, info_buf and info might be clobbered and
115 : the restore state is unknown. */
116 :
117 : static int
118 : fd_wksp_restore_v2_info( fd_restore_t * restore,
119 : fd_wksp_checkpt_v2_hdr_t const * hdr,
120 : fd_wksp_checkpt_v2_info_t * info,
121 : char * info_buf,
122 : ulong info_buf_max,
123 126 : char const * info_cstr[ 9 ] ) {
124 126 : ulong frame_off;
125 :
126 126 : RESTORE_OPEN( hdr->frame_style_compressed );
127 126 : RESTORE_META( info, sizeof(fd_wksp_checkpt_v2_info_t) );
128 126 : ulong info_buf_sz = info->sz_app
129 126 : + info->sz_thread
130 126 : + info->sz_host
131 126 : + info->sz_cpu
132 126 : + info->sz_group
133 126 : + info->sz_user
134 126 : + info->sz_path
135 126 : + info->sz_binfo
136 126 : + info->sz_uinfo;
137 126 : RESTORE_TEST( info_buf_sz<=info_buf_max );
138 126 : RESTORE_DATA( info_buf, info_buf_sz );
139 126 : RESTORE_CLOSE();
140 :
141 126 : char const * p = info_buf;
142 :
143 1134 : # define NEXT( sz, max ) (__extension__({ \
144 1134 : char const * _cstr = p; \
145 1134 : ulong _sz = (sz); \
146 1134 : ulong _max = (max); \
147 1134 : RESTORE_TEST( (0UL<_sz) & (_sz<=_max) ); \
148 1134 : RESTORE_TEST( fd_cstr_nlen( _cstr, _max )==(_sz-1UL) ); \
149 1134 : p += _sz; \
150 1134 : _cstr; \
151 1134 : }))
152 :
153 252 : info_cstr[0] = NEXT( info->sz_app, FD_LOG_NAME_MAX );
154 252 : info_cstr[1] = NEXT( info->sz_thread, FD_LOG_NAME_MAX );
155 252 : info_cstr[2] = NEXT( info->sz_host, FD_LOG_NAME_MAX );
156 252 : info_cstr[3] = NEXT( info->sz_cpu, FD_LOG_NAME_MAX );
157 252 : info_cstr[4] = NEXT( info->sz_group, FD_LOG_NAME_MAX );
158 252 : info_cstr[5] = NEXT( info->sz_user, FD_LOG_NAME_MAX );
159 252 : info_cstr[6] = NEXT( info->sz_path, PATH_MAX );
160 252 : info_cstr[7] = NEXT( info->sz_binfo, FD_WKSP_CHECKPT_V2_BINFO_MAX );
161 126 : info_cstr[8] = NEXT( info->sz_uinfo, FD_WKSP_CHECKPT_V2_UINFO_MAX );
162 :
163 0 : # undef NEXT
164 :
165 126 : return FD_WKSP_SUCCESS;
166 :
167 0 : fail:
168 0 : return FD_WKSP_ERR_FAIL;
169 126 : }
170 :
171 : /* fd_wksp_restore_v2_ftr restores the footer frame from a wksp checkpt.
172 : Assumes restore is valid and at the frame start, hdr has info from
173 : the corresponding hdr and ftr is valid. On success, returns SUCCESS,
174 : *ftr will be populated with a valid data and restore will be just
175 : after the frame end. On failure, returns FAIL, *ftr is clobbered and
176 : the caller should not assume anything about the restore state.
177 :
178 : IMPORTANT SAFETY TIP! This only validates the ftr and hdr are
179 : compatible. It is up to the caller to validate alloc_cnt,
180 : cgroup_cnt, volume_cnt, and frame_off as those may not have been
181 : known when hdr was written and ftr is restored. */
182 :
183 : static int
184 : fd_wksp_restore_v2_ftr( fd_restore_t * restore,
185 : fd_wksp_checkpt_v2_hdr_t const * hdr,
186 : fd_wksp_checkpt_v2_ftr_t * ftr,
187 63 : ulong checkpt_sz ) {
188 63 : ulong frame_off;
189 :
190 63 : RESTORE_OPEN( FD_CHECKPT_FRAME_STYLE_RAW );
191 63 : RESTORE_DATA( ftr, sizeof(fd_wksp_checkpt_v2_ftr_t) );
192 63 : RESTORE_CLOSE();
193 :
194 63 : RESTORE_TEST( frame_off ==checkpt_sz );
195 63 : RESTORE_TEST( ftr->checkpt_sz==checkpt_sz );
196 :
197 63 : RESTORE_TEST( ftr->data_max ==hdr->data_max );
198 63 : RESTORE_TEST( ftr->part_max ==hdr->part_max );
199 63 : RESTORE_TEST( ftr->seed ==hdr->seed );
200 63 : RESTORE_TEST( !memcmp( ftr->name, hdr->name, FD_SHMEM_NAME_MAX ) );
201 63 : RESTORE_TEST( ftr->reserved ==hdr->reserved );
202 63 : RESTORE_TEST( ftr->frame_style_compressed ==hdr->frame_style_compressed );
203 63 : RESTORE_TEST( ftr->style ==hdr->style );
204 63 : RESTORE_TEST( ftr->unmagic ==~hdr->magic );
205 :
206 63 : return FD_WKSP_SUCCESS;
207 :
208 0 : fail:
209 0 : return FD_WKSP_ERR_FAIL;
210 63 : }
211 :
212 : /* fd_wksp_private_restore_v2_common does the common parts of a
213 : streaming and a parallel wksp restore (restores the header and info
214 : frames and pretty prints them to the log). Assumes wksp and restore
215 : are valid and restore is on the first header byte. On success,
216 : returns SUCCESS and the restore will have processed the header and
217 : info frames and will be positioned just after the info frame. On
218 : failure, returns FAIL and restore and hdr will be in an indeterminant
219 : state. */
220 :
221 : static int
222 : fd_wksp_private_restore_v2_common( fd_wksp_checkpt_v2_hdr_t * hdr,
223 63 : fd_restore_t * restore ) {
224 :
225 63 : FD_LOG_INFO(( "Restoring header and info (v2 frames 0:1)" ));
226 :
227 63 : RESTORE_TEST( !fd_wksp_restore_v2_hdr( restore, hdr ) );
228 :
229 63 : fd_wksp_checkpt_v2_info_t info[1];
230 63 : char info_buf[ 65536 ];
231 63 : char const * info_cstr[9];
232 :
233 63 : RESTORE_TEST( !fd_wksp_restore_v2_info( restore, hdr, info, info_buf, 65536UL, info_cstr ) );
234 :
235 : /* Note: this mirrors printf below */
236 :
237 63 : char info_wallclock[ FD_LOG_WALLCLOCK_CSTR_BUF_SZ ];
238 63 : fd_log_wallclock_cstr( info->wallclock, info_wallclock );
239 :
240 63 : FD_LOG_INFO(( "\n"
241 63 : "\tstyle %-20i\n" /* verbose 0 info */
242 63 : "\tname %s\n"
243 63 : "\tseed %-20u\n"
244 63 : "\tpart_max %-20lu\n"
245 63 : "\tdata_max %-20lu\n"
246 63 : "\tmagic %016lx\n" /* verbose 1 info */
247 63 : "\twallclock %-20li (%s)\n"
248 63 : "\tapp %-20lu (%s)\n"
249 63 : "\tthread %-20lu (%s)\n"
250 63 : "\thost %-20lu (%s)\n"
251 63 : "\tcpu %-20lu (%s)\n"
252 63 : "\tgroup %-20lu (%s)\n"
253 63 : "\ttid %-20lu\n"
254 63 : "\tuser %-20lu (%s)\n"
255 63 : "\tframe_style_compressed %-20i\n" /* (v2 specific) */
256 63 : "\tmode %03lo", /* (v2 specific) */
257 63 : hdr->style, hdr->name, hdr->seed, hdr->part_max, hdr->data_max,
258 63 : hdr->magic, info->wallclock, info_wallclock,
259 63 : info->app_id, info_cstr[0],
260 63 : info->thread_id, info_cstr[1],
261 63 : info->host_id, info_cstr[2],
262 63 : info->cpu_id, info_cstr[3],
263 63 : info->group_id, info_cstr[4],
264 63 : info->tid,
265 63 : info->user_id, info_cstr[5],
266 63 : hdr->frame_style_compressed,
267 63 : info->mode ));
268 :
269 : /* The below info cstr are potentially long enough to be truncated by
270 : the logger. So we break them into separate log messages to log as
271 : much detail as possible. */
272 :
273 63 : FD_LOG_INFO(( "path\n\t\t%s", info_cstr[6] )); /* verbose 2 info (v2 specific) */
274 63 : FD_LOG_INFO(( "binfo\n\t\t%s", info_cstr[7] )); /* verbose 2 info */
275 63 : FD_LOG_INFO(( "uinfo\n\t\t%s", info_cstr[8] )); /* verbose 2 info */
276 :
277 63 : return FD_WKSP_SUCCESS;
278 :
279 0 : fail:
280 0 : return FD_WKSP_ERR_FAIL;
281 63 : }
282 :
283 : /* fd_wksp_private_restore_v2_cgroup restores a cgroup's allocation into
284 : wksp. hdr contains the corresponding restore header info, frame_off
285 : is where the cgroup frame to restore is located and partitions
286 : [part_lo,part_hi) are the wksp partition indices to use for this
287 : frame's allocations. Assumes all inputs have already been validated.
288 : Returns SUCCESS (0) on success and FAIL (negative) on failure. On
289 : return, in both cases, *_dirty will be 1/0 if wksp was/was not
290 : modified. On error, the restore state is indeterminant. */
291 :
292 : static int
293 : fd_wksp_private_restore_v2_cgroup( fd_wksp_t * wksp,
294 : fd_restore_t * restore,
295 : fd_wksp_checkpt_v2_hdr_t const * hdr,
296 : ulong frame_off_lo,
297 : ulong frame_off_hi,
298 : ulong part_lo,
299 : ulong part_hi,
300 21 : int * _dirty ) {
301 21 : int dirty = 0;
302 :
303 21 : fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
304 21 : ulong data_lo = wksp->gaddr_lo;
305 21 : ulong data_hi = wksp->gaddr_hi;
306 :
307 21 : ulong hdr_data_lo = fd_wksp_private_data_off( hdr->part_max );
308 21 : ulong hdr_data_hi = hdr_data_lo + hdr->data_max;
309 :
310 21 : ulong frame_off;
311 21 : RESTORE_SEEK( frame_off_lo );
312 21 : RESTORE_OPEN( hdr->frame_style_compressed );
313 :
314 : /* For all cgroup allocation metadata */
315 :
316 21 : fd_wksp_checkpt_v2_cmd_t cmd[1];
317 :
318 96 : for( ulong part_idx=part_lo; part_idx<part_hi; part_idx++ ) {
319 :
320 75 : RESTORE_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
321 75 : RESTORE_TEST( fd_wksp_checkpt_v2_cmd_is_meta( cmd ) );
322 :
323 75 : ulong tag = cmd->meta.tag; /* non-zero */
324 75 : ulong gaddr_lo = cmd->meta.gaddr_lo;
325 75 : ulong gaddr_hi = cmd->meta.gaddr_hi;
326 :
327 75 : RESTORE_TEST( (hdr_data_lo<=gaddr_lo) & (gaddr_lo<gaddr_hi) & (gaddr_hi<=hdr_data_hi) );
328 : /* Note: disjoint [gaddr_lo,gaddr_hi) tested on rebuild */
329 :
330 75 : if( FD_UNLIKELY( !((data_lo<=gaddr_lo) & (gaddr_hi<=data_hi)) ) ) {
331 0 : FD_LOG_WARNING(( "restore failed because checkpt partition [0x%016lx,0x%016lx) tag %lu does not fit into current "
332 0 : "wksp data region [0x%016lx,0x%016lx) (data_max checkpt %lu, wksp %lu)",
333 0 : gaddr_lo, gaddr_hi, tag, data_lo, data_hi, hdr->data_max, wksp->data_max ));
334 0 : goto fail;
335 0 : }
336 :
337 75 : dirty = 1;
338 75 : pinfo[ part_idx ].gaddr_lo = gaddr_lo;
339 75 : pinfo[ part_idx ].gaddr_hi = gaddr_hi;
340 75 : pinfo[ part_idx ].tag = tag;
341 75 : }
342 :
343 : /* Restore the data command */
344 :
345 21 : RESTORE_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
346 21 : RESTORE_TEST( fd_wksp_checkpt_v2_cmd_is_data( cmd ) );
347 :
348 : /* For all cgroup allocation data */
349 :
350 96 : for( ulong part_idx=part_lo; part_idx<part_hi; part_idx++ ) {
351 75 : ulong gaddr_lo = pinfo[ part_idx ].gaddr_lo;
352 75 : ulong gaddr_hi = pinfo[ part_idx ].gaddr_hi;
353 :
354 : /* Restore the allocation into the wksp data region */
355 :
356 75 : dirty = 1;
357 75 : RESTORE_DATA( fd_wksp_laddr_fast( wksp, gaddr_lo ), gaddr_hi - gaddr_lo );
358 75 : }
359 :
360 : /* Close the frame */
361 :
362 21 : RESTORE_CLOSE();
363 :
364 21 : RESTORE_TEST( (frame_off_lo<frame_off) & (frame_off<=frame_off_hi) ); /* == hi if compactly stored */
365 :
366 21 : *_dirty = dirty;
367 21 : return FD_WKSP_SUCCESS;
368 :
369 0 : fail:
370 0 : *_dirty = dirty;
371 0 : return FD_WKSP_ERR_FAIL;
372 21 : }
373 :
374 : /* fd_wksp_private_restore_v2_node dispatches cgroup restore work to
375 : tpool threads [t0,t1). If any errors were encountered while
376 : restoring cgroups, returns the first error encountered on the lowest
377 : indexed thread in the int location pointed to by _err. If any
378 : modifications were made to wksp (whether or not there were errors),
379 : the int location pointed to by _dirty will be set to 1. Assumes
380 : caller is thread t0 and threads (t0,t1) are available. Note that we
381 : could do this with FD_MAP_REDUCE but FD_MAP_REDUCE assumes that
382 : fd_scratch space is available and we can't guarantee that here.
383 : Likewise, we could use tpool_exec_all with a TASKQ model but
384 : reduction of results is less efficient. */
385 :
386 : static void
387 : fd_wksp_private_restore_v2_node( void * tpool,
388 : ulong tpool_t0,
389 : ulong tpool_t1, /* Assumes t1>t0 */
390 : void * _wksp,
391 : void * _restore,
392 : ulong _hdr,
393 : ulong _cgroup_frame_off,
394 : ulong _cgroup_pinfo_lo,
395 : ulong _cgroup_nxt,
396 : ulong cgroup_cnt,
397 : ulong _err,
398 63 : ulong _dirty ) {
399 :
400 : /* This node is responsible for threads [t0,t1). If this range has
401 : more than one thread, split the range into left and right halves,
402 : have the first right half thread handle the right half, use this
403 : thread to handle the left half and then reduce the results from
404 : the two halves. */
405 :
406 63 : ulong tpool_cnt = tpool_t1 - tpool_t0;
407 63 : if( tpool_cnt>1UL ) {
408 0 : ulong tpool_ts = tpool_t0 + fd_tpool_private_split( tpool_cnt );
409 :
410 0 : int err0; int dirty0;
411 0 : int err1; int dirty1;
412 :
413 0 : fd_tpool_exec( tpool, tpool_ts, fd_wksp_private_restore_v2_node,
414 0 : tpool, tpool_ts, tpool_t1, _wksp, _restore, _hdr, _cgroup_frame_off, _cgroup_pinfo_lo, _cgroup_nxt, cgroup_cnt,
415 0 : (ulong)&err1, (ulong)&dirty1 );
416 0 : fd_wksp_private_restore_v2_node(
417 0 : tpool, tpool_t0, tpool_ts, _wksp, _restore, _hdr, _cgroup_frame_off, _cgroup_pinfo_lo, _cgroup_nxt, cgroup_cnt,
418 0 : (ulong)&err0, (ulong)&dirty0 );
419 0 : fd_tpool_wait( tpool, tpool_ts );
420 :
421 0 : *(int *)_err = fd_int_if( !!err0, err0, err1 ); /* Return first error encountered */
422 0 : *(int *)_dirty = dirty0 | dirty1; /* Accumulate the dirty flag */
423 0 : return;
424 0 : }
425 :
426 : /* This node is responsible for a single thread. Unpack the input
427 : arguments. */
428 :
429 63 : fd_wksp_t * wksp = (fd_wksp_t *) _wksp;
430 63 : fd_restore_t * restore = (fd_restore_t *) _restore; /* FIXME: CLONE RESTORE */
431 63 : fd_wksp_checkpt_v2_hdr_t const * hdr = (fd_wksp_checkpt_v2_hdr_t *)_hdr;
432 63 : ulong const * cgroup_frame_off = (ulong *) _cgroup_frame_off;
433 63 : ulong const * cgroup_pinfo_lo = (ulong *) _cgroup_pinfo_lo;
434 :
435 63 : int err = FD_WKSP_SUCCESS;
436 63 : int dirty = 0;
437 :
438 : /* Since we can't have multiple threads operate concurrently on the
439 : same restore object, make a new restore object safe for use by this
440 : thread (technically could use restore directly on original thread
441 : t0). */
442 :
443 63 : fd_restore_t _restore_local[1];
444 63 : fd_restore_t * restore_local =
445 63 : fd_restore_init_mmio( _restore_local, fd_restore_mmio( restore ), fd_restore_mmio_sz( restore ) ); /* logs details */
446 63 : if( FD_UNLIKELY( !restore_local ) ) {
447 0 : err = FD_WKSP_ERR_FAIL;
448 0 : goto done;
449 0 : }
450 :
451 84 : for(;;) {
452 :
453 : /* Get the next cgroup to restore. Use a dynamic task queue model
454 : here because we assume that restore a single cgroups requires a
455 : large amount of work and the amount of work is highly variable.
456 : Note that using an atomic increment for the cgroup_nxt counter
457 : assumes:
458 :
459 : cgroup_cnt << ULONG_MAX - TILE_MAX.
460 :
461 : We could use a slower atomic CAS based version instead if we want
462 : to insure that cgroup_nxt is never incremented beyond cgroup_cnt.
463 : We could also use a block partitioning or CUDA style striping if
464 : wanting to do a deterministic distribution but these might not
465 : load balance as well in various extreme circumstances. */
466 :
467 84 : # if FD_HAS_ATOMIC
468 84 : FD_COMPILER_MFENCE();
469 84 : ulong cgroup_idx = FD_ATOMIC_FETCH_AND_ADD( (ulong *)_cgroup_nxt, 1UL );
470 84 : FD_COMPILER_MFENCE();
471 : # else /* Note: this assumes platforms without HAS_ATOMIC will not be running this multithreaded */
472 : ulong cgroup_idx = (*(ulong *)_cgroup_nxt) + 1UL;
473 : # endif
474 :
475 84 : if( FD_UNLIKELY( cgroup_idx>=cgroup_cnt ) ) break; /* No more cgroups to process */
476 :
477 : /* Restore this cgroup */
478 :
479 21 : int dirty_cgroup;
480 21 : err = fd_wksp_private_restore_v2_cgroup( wksp, restore_local, hdr,
481 21 : cgroup_frame_off[ cgroup_idx ], cgroup_frame_off[ cgroup_idx+1UL ],
482 21 : cgroup_pinfo_lo [ cgroup_idx ], cgroup_pinfo_lo [ cgroup_idx+1UL ],
483 21 : &dirty_cgroup ); /* logs details */
484 21 : dirty |= dirty_cgroup;
485 21 : if( FD_UNLIKELY( err ) ) break; /* abort if we encountered an error */
486 :
487 21 : }
488 :
489 63 : fd_restore_fini( restore_local );
490 :
491 63 : done:
492 63 : *(int *)_err = err;
493 63 : *(int *)_dirty = dirty;
494 63 : }
495 :
496 : /* fd_wksp_private_restore_v2_mmio replaces all the allocations in a
497 : wksp with the allocations in the restore. Assumes all inputs have
498 : are valid, restore is positioned on the first byte of the header, has
499 : the given size and is seekable. Returns SUCCESS on success and the
500 : restore will be positioned just after the footer. Returns FAIL if an
501 : error occurred before wksp was not modified and CORRUPT if an error
502 : occurred after. On failure, the restore state is indeterminant.
503 : Uses tpool threads [t0,t1) to do the restore. Assumes the caller is
504 : thread t0 and threads (t0,t1) are available for dispatch. */
505 :
506 : static int
507 : fd_wksp_private_restore_v2_mmio( fd_tpool_t * tpool,
508 : ulong t0,
509 : ulong t1,
510 : fd_wksp_t * wksp,
511 : fd_restore_t * restore,
512 63 : uint new_seed ) {
513 :
514 63 : ulong frame_off;
515 :
516 63 : int locked = 0; /* is the wksp currently locked? */
517 63 : int dirty = 0; /* has the wksp been modified? */
518 :
519 : /* Restore and validate the header, info, and footer. In principle
520 : this could be parallelized but probably not worth it. */
521 :
522 63 : ulong restore_sz = fd_restore_sz( restore );
523 :
524 63 : ulong frame_off_hdr = 0UL;
525 63 : ulong frame_off_info = frame_off_hdr + sizeof(fd_wksp_checkpt_v2_hdr_t);
526 63 : ulong frame_off_ftr = restore_sz - sizeof(fd_wksp_checkpt_v2_ftr_t);
527 :
528 63 : RESTORE_TEST( /*(0UL<=frame_off_hdr) &*/ (frame_off_hdr<frame_off_info) & (frame_off_info<frame_off_ftr) & (frame_off_ftr<restore_sz) );
529 :
530 63 : fd_wksp_checkpt_v2_hdr_t hdr[1];
531 :
532 : //RESTORE_SEEK( frame_off_hdr );
533 63 : RESTORE_TEST( !fd_wksp_private_restore_v2_common( hdr, restore ) );
534 :
535 63 : FD_LOG_INFO(( "Restoring footer" ));
536 :
537 63 : fd_wksp_checkpt_v2_ftr_t ftr[1];
538 :
539 63 : RESTORE_SEEK( frame_off_ftr );
540 63 : RESTORE_TEST( !fd_wksp_restore_v2_ftr( restore, hdr, ftr, restore_sz ) );
541 :
542 63 : ulong frame_off_volumes = ftr->frame_off;
543 :
544 63 : RESTORE_TEST( (frame_off_info<frame_off_volumes) & (frame_off_volumes<frame_off_ftr) );
545 :
546 63 : if( FD_UNLIKELY( ftr->alloc_cnt>wksp->part_max ) ) {
547 0 : FD_LOG_WARNING(( "restore failed because there are too few wksp partitions to restore allocations into "
548 0 : "(ftr alloc_cnt %lu, hdr part_max %lu, wksp part_max %lu)",
549 0 : ftr->alloc_cnt, hdr->part_max, wksp->part_max ));
550 0 : goto fail;
551 0 : }
552 :
553 63 : FD_LOG_INFO(( "Restoring volumes" ));
554 :
555 63 : fd_wksp_checkpt_v2_cmd_t cmd[1];
556 :
557 63 : RESTORE_SEEK( frame_off_volumes );
558 63 : RESTORE_OPEN( hdr->frame_style_compressed );
559 63 : RESTORE_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
560 63 : RESTORE_CLOSE();
561 :
562 63 : RESTORE_TEST( (cmd->volumes.tag==0UL) & (cmd->volumes.cgroup_cnt==ULONG_MAX) ); /* frame_off_appendix tested below */
563 63 : RESTORE_TEST( (frame_off_volumes<frame_off) & (frame_off<=frame_off_ftr) ); /* ==frame_off_ftr if compactly stored */
564 :
565 63 : FD_LOG_INFO(( "Locking wksp" ));
566 :
567 63 : if( FD_UNLIKELY( fd_wksp_private_lock( wksp ) ) ) goto fail; /* logs details */
568 63 : locked = 1;
569 :
570 : /* For all volumes */
571 :
572 63 : ulong alloc_rem = ftr->alloc_cnt; /* Number of allocations remaining to process */
573 63 : ulong cgroup_rem = ftr->cgroup_cnt; /* Number of cgroups remaining to process */
574 63 : ulong volume_rem = ftr->volume_cnt; /* Number of volumes remaining to process */
575 :
576 63 : ulong frame_off_volume_lo = frame_off_info;
577 63 : ulong frame_off_volume_hi = frame_off_volumes;
578 63 : ulong frame_off_appendix = cmd->volumes.frame_off;
579 :
580 126 : while( frame_off_appendix ) {
581 :
582 : /* Verify we still have volumes remaining and the appendix location
583 : is between the info frame and the next volume (or the footer if
584 : the last volume) */
585 :
586 63 : RESTORE_TEST( (volume_rem>0UL) & (frame_off_volume_lo<frame_off_appendix) & (frame_off_appendix<frame_off_volume_hi) );
587 :
588 : /* Now that we know where this volume's appendix is supposed to be,
589 : seek to it and then restore and validate it. */
590 :
591 63 : FD_LOG_INFO(( "Restoring volume appendix" ));
592 :
593 63 : RESTORE_SEEK( frame_off_appendix );
594 :
595 63 : ulong cgroup_frame_off[ FD_WKSP_RESTORE_V2_CGROUP_MAX+1UL ];
596 63 : ulong cgroup_pinfo_lo [ FD_WKSP_RESTORE_V2_CGROUP_MAX+1UL ];
597 63 : ulong cgroup_cnt;
598 :
599 63 : ulong frame_off_prev;
600 :
601 63 : {
602 63 : RESTORE_OPEN( hdr->frame_style_compressed );
603 :
604 63 : fd_wksp_checkpt_v2_cmd_t cmd[1];
605 :
606 63 : RESTORE_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
607 63 : RESTORE_TEST( fd_wksp_checkpt_v2_cmd_is_appendix( cmd ) );
608 :
609 63 : cgroup_cnt = cmd->appendix.cgroup_cnt;
610 63 : frame_off_prev = cmd->appendix.frame_off;
611 :
612 63 : if( FD_UNLIKELY( cgroup_cnt>FD_WKSP_RESTORE_V2_CGROUP_MAX ) ) {
613 0 : FD_LOG_WARNING(( "increase FD_WKSP_RESTORE_V2_CGROUP_MAX for this target" ));
614 0 : goto fail;
615 0 : }
616 :
617 63 : RESTORE_DATA( cgroup_frame_off, cgroup_cnt*sizeof(ulong) );
618 63 : RESTORE_DATA( cgroup_pinfo_lo, cgroup_cnt*sizeof(ulong) ); /* cgroup_alloc_cnt now, pinfo cgroup partitioning later */
619 63 : RESTORE_CLOSE();
620 :
621 : /* Verify this cgroups frames are between the previous appendix frame
622 : (or the info frame if the first volume) and this appendix frame
623 : and ordered. Also, verify the cgroup allocation counts,
624 : convert the counts into a partitioning of wksp's pinfo array
625 : and make sure we have enough partitions in the wksp to attempt
626 : the restore. In principle, this loop could be parallelized but
627 : probably not worth it. */
628 :
629 63 : cgroup_frame_off[ cgroup_cnt ] = frame_off_appendix;
630 63 : cgroup_pinfo_lo [ cgroup_cnt ] = alloc_rem;
631 :
632 84 : for( ulong cgroup_rem=cgroup_cnt; cgroup_rem; cgroup_rem-- ) {
633 :
634 21 : ulong cgroup_idx = cgroup_rem - 1UL;
635 21 : RESTORE_TEST( cgroup_frame_off[ cgroup_idx ] < cgroup_frame_off[ cgroup_idx+1UL ] );
636 :
637 21 : ulong cgroup_alloc_cnt = cgroup_pinfo_lo[ cgroup_idx ];
638 21 : RESTORE_TEST( cgroup_alloc_cnt<=alloc_rem );
639 21 : alloc_rem -= cgroup_alloc_cnt;
640 21 : cgroup_pinfo_lo[ cgroup_idx ] = alloc_rem;
641 :
642 21 : }
643 :
644 63 : RESTORE_TEST( fd_ulong_max( frame_off_prev, frame_off_info ) < cgroup_frame_off[0] );
645 63 : }
646 :
647 : /* At this point, we know how to do an embarassingly parallel
648 : restore directly into the wksp. Dispatch work to tpool threads
649 : [t0,t1). This assumes we are tpool thread t0 and threads (t0,t1)
650 : are available for dispatch. On return from the dispatch, err
651 : will contain the error code from the lowest indexed cgroup_idx
652 : that encountered an error (if any error was encountered, some
653 : cgroups might not have been processed) and dirty_node will
654 : contain non-zero if the wksp was modified. */
655 :
656 63 : FD_LOG_INFO(( "Restoring volume cgroups" ));
657 :
658 63 : ulong cgroup_nxt[1];
659 :
660 63 : FD_COMPILER_MFENCE();
661 63 : FD_VOLATILE( cgroup_nxt[0] ) = 0UL;
662 63 : FD_COMPILER_MFENCE();
663 :
664 63 : int err;
665 63 : int dirty_node;
666 63 : fd_wksp_private_restore_v2_node( (void *)tpool, t0, t1,
667 63 : (void *)wksp, (void *)restore, (ulong)hdr, (ulong)cgroup_frame_off, (ulong)cgroup_pinfo_lo,
668 63 : (ulong)cgroup_nxt, cgroup_cnt, (ulong)&err, (ulong)&dirty_node );
669 63 : dirty |= dirty_node;
670 63 : if( FD_UNLIKELY( err ) ) goto fail;
671 :
672 : /* Advance to the next volume */
673 :
674 63 : cgroup_rem -= cgroup_cnt;
675 63 : volume_rem--;
676 : /* frame_off_volume_lo unchanged */
677 63 : frame_off_volume_hi = cgroup_frame_off[ 0 ];
678 63 : frame_off_appendix = frame_off_prev;
679 63 : }
680 :
681 : /* Make sure we got all volumes and all cgroups and position the
682 : restore at the location it would have been at in a streaming
683 : restore. */
684 :
685 63 : RESTORE_TEST( alloc_rem ==0UL );
686 63 : RESTORE_TEST( cgroup_rem==0UL );
687 63 : RESTORE_TEST( volume_rem==0UL );
688 :
689 63 : RESTORE_SEEK( restore_sz );
690 :
691 : /* Free any remaining old allocations and rebuild the wksp with our
692 : freshly restored allocations. In principle the free loop can be
693 : parallelized but it is probably not worth it. */
694 :
695 63 : FD_LOG_INFO(( "Rebuilding wksp" ));
696 :
697 63 : dirty = 1;
698 :
699 63 : fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
700 63 : ulong part_max = wksp->part_max;
701 :
702 1031172 : for( ulong part_idx=ftr->alloc_cnt; part_idx<part_max; part_idx++ ) pinfo[ part_idx ].tag = 0UL;
703 :
704 63 : if( FD_UNLIKELY( fd_wksp_rebuild( wksp, new_seed ) ) ) goto fail; /* logs details */
705 :
706 63 : FD_LOG_INFO(( "Unlocking wksp" ));
707 :
708 63 : fd_wksp_private_unlock( wksp );
709 :
710 63 : return FD_WKSP_SUCCESS;
711 :
712 0 : fail: /* Release resources that might be reserved */
713 :
714 0 : if( FD_LIKELY( locked ) ) fd_wksp_private_unlock( wksp );
715 :
716 0 : return fd_int_if( dirty, FD_WKSP_ERR_CORRUPT, FD_WKSP_ERR_FAIL );
717 63 : }
718 :
719 : /* fd_wksp_private_restore_v2_stream is identical to
720 : fd_wksp_private_restore_v2_mmio (above) but usable when restore is
721 : not using memory mapped i/o under the hood. This includes when the
722 : restore is from a non-seekable file descriptor (e.g. when the restore
723 : is from a pipe or socket but this will work fine if used on mmio
724 : restores too). Restore must be compactly stored. Exact same
725 : behaviors. */
726 :
727 : static int
728 : fd_wksp_private_restore_v2_stream( fd_wksp_t * wksp,
729 : fd_restore_t * restore,
730 0 : uint new_seed ) {
731 0 : ulong frame_off;
732 :
733 0 : int locked = 0; /* is the wksp currently locked */
734 0 : int dirty = 0; /* has the wksp been modified? */
735 :
736 0 : fd_wksp_checkpt_v2_hdr_t hdr[1];
737 :
738 0 : RESTORE_TEST( !fd_wksp_private_restore_v2_common( hdr, restore ) );
739 :
740 0 : FD_LOG_INFO(( "Locking wksp" ));
741 :
742 0 : if( FD_UNLIKELY( fd_wksp_private_lock( wksp ) ) ) goto fail; /* logs details */
743 0 : locked = 1;
744 :
745 0 : fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
746 0 : ulong part_max = wksp->part_max;
747 0 : ulong data_max = wksp->data_max;
748 0 : ulong data_lo = wksp->gaddr_lo;
749 0 : ulong data_hi = wksp->gaddr_hi;
750 :
751 0 : ulong hdr_data_max = hdr->data_max;
752 0 : ulong hdr_data_lo = fd_wksp_private_data_off( hdr->part_max );
753 0 : ulong hdr_data_hi = hdr_data_lo + hdr_data_max;
754 :
755 : /* For all volumes in the checkpt */
756 :
757 0 : ulong ftr_alloc_cnt = 0UL;
758 0 : ulong ftr_cgroup_cnt = 0UL;
759 0 : ulong ftr_volume_cnt = 0UL;
760 0 : ulong frame_off_prev = 0UL;
761 :
762 0 : for(;;) {
763 :
764 0 : FD_LOG_INFO(( "Restoring volume %lu", ftr_volume_cnt ));
765 :
766 0 : ulong vol_cgroup_cnt = 0UL;
767 :
768 0 : ulong vol_cgroup_frame_off[ FD_WKSP_RESTORE_V2_CGROUP_MAX ];
769 0 : ulong vol_cgroup_alloc_cnt[ FD_WKSP_RESTORE_V2_CGROUP_MAX ];
770 :
771 0 : ulong vol_appendix_frame_off[ FD_WKSP_RESTORE_V2_CGROUP_MAX ];
772 0 : ulong vol_appendix_alloc_cnt[ FD_WKSP_RESTORE_V2_CGROUP_MAX ];
773 :
774 : /* For all cgroups in the volume */
775 :
776 0 : for(;;) {
777 :
778 0 : ulong part_lo = ftr_alloc_cnt;
779 :
780 : /* Open the frame and read the leading command to determine if the
781 : frame is a cgroup, appendix (which ends the volume) or an end
782 : of volumes frame (which ends the checkpt). If it is an
783 : appendix, validate and close the frame and proceed to the next
784 : volume. If it is the end of volumes, validate and close the
785 : frame and proceed to footer processing. Otherwise, proceed to
786 : processing a cgroup frame. */
787 :
788 0 : RESTORE_OPEN( hdr->frame_style_compressed );
789 :
790 0 : fd_wksp_checkpt_v2_cmd_t cmd[1];
791 :
792 0 : RESTORE_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
793 :
794 0 : if( FD_UNLIKELY( fd_wksp_checkpt_v2_cmd_is_appendix( cmd ) ) ) {
795 0 : RESTORE_TEST( cmd->appendix.frame_off==frame_off_prev );
796 0 : frame_off_prev = frame_off;
797 :
798 0 : RESTORE_DATA( vol_appendix_frame_off, vol_cgroup_cnt*sizeof(ulong) );
799 0 : RESTORE_DATA( vol_appendix_alloc_cnt, vol_cgroup_cnt*sizeof(ulong) );
800 0 : RESTORE_CLOSE();
801 :
802 0 : RESTORE_TEST( !memcmp( vol_appendix_frame_off, vol_cgroup_frame_off, vol_cgroup_cnt*sizeof(ulong) ) );
803 0 : RESTORE_TEST( !memcmp( vol_appendix_alloc_cnt, vol_cgroup_alloc_cnt, vol_cgroup_cnt*sizeof(ulong) ) );
804 :
805 0 : break;
806 0 : }
807 :
808 0 : if( FD_UNLIKELY( fd_wksp_checkpt_v2_cmd_is_volumes( cmd ) ) ) {
809 0 : RESTORE_TEST( cmd->volumes.frame_off==frame_off_prev );
810 0 : frame_off_prev = frame_off;
811 :
812 0 : RESTORE_CLOSE();
813 :
814 0 : goto restore_footer;
815 0 : }
816 :
817 : /* At this point, we have read the leading command of a cgroup frame.
818 : Restore the cgroup allocation metadata. */
819 :
820 0 : if( FD_UNLIKELY( vol_cgroup_cnt>=FD_WKSP_RESTORE_V2_CGROUP_MAX ) ) {
821 0 : FD_LOG_WARNING(( "increase FD_WKSP_RESTORE_V2_CGROUP_MAX" ));
822 0 : goto fail;
823 0 : }
824 :
825 0 : vol_cgroup_frame_off[ vol_cgroup_cnt ] = frame_off;
826 :
827 0 : for(;;) {
828 0 : if( FD_UNLIKELY( fd_wksp_checkpt_v2_cmd_is_data( cmd ) ) ) break;
829 0 : RESTORE_TEST( fd_wksp_checkpt_v2_cmd_is_meta( cmd ) );
830 :
831 0 : ulong tag = cmd->meta.tag; /* non-zero */
832 0 : ulong gaddr_lo = cmd->meta.gaddr_lo;
833 0 : ulong gaddr_hi = cmd->meta.gaddr_hi;
834 :
835 0 : RESTORE_TEST( (hdr_data_lo<=gaddr_lo) & (gaddr_lo<gaddr_hi) & (gaddr_hi<=hdr_data_hi) );
836 : /* Note: disjoint [gaddr_lo,gaddr_hi) tested on rebuild */
837 :
838 0 : if( FD_UNLIKELY( !((data_lo<=gaddr_lo) & (gaddr_hi<=data_hi)) ) ) {
839 0 : FD_LOG_WARNING(( "restore failed because checkpt allocation [0x%016lx,0x%016lx) tag %lu does not fit into the wksp "
840 0 : "data region [0x%016lx,0x%016lx) (hdr_data_max %lu, wksp_data_max %lu)",
841 0 : gaddr_lo, gaddr_hi, tag, data_lo, data_hi, hdr_data_max, data_max ));
842 0 : goto fail;
843 0 : }
844 :
845 0 : if( FD_UNLIKELY( ftr_alloc_cnt>=part_max ) ) {
846 0 : FD_LOG_WARNING(( "restore failed because there are too few wksp partitions to restore allocations into "
847 0 : "(alloc_cnt %lu, hdr_part_max %lu, wksp_part_max %lu)",
848 0 : ftr_alloc_cnt, hdr->part_max, wksp->part_max ));
849 0 : goto fail;
850 0 : }
851 :
852 0 : dirty = 1;
853 0 : pinfo[ ftr_alloc_cnt ].gaddr_lo = gaddr_lo;
854 0 : pinfo[ ftr_alloc_cnt ].gaddr_hi = gaddr_hi;
855 0 : pinfo[ ftr_alloc_cnt ].tag = tag;
856 0 : ftr_alloc_cnt++;
857 :
858 0 : RESTORE_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
859 0 : }
860 :
861 : /* At this point, we have restored all cgroup allocation metadata
862 : into the pinfo array at [part_lo,ftr_alloc_cnt). Restore the
863 : corresponding cgroup allocation data. */
864 :
865 0 : for( ulong part_idx=part_lo; part_idx<ftr_alloc_cnt; part_idx++ ) {
866 0 : ulong gaddr_lo = pinfo[ part_idx ].gaddr_lo;
867 0 : ulong gaddr_hi = pinfo[ part_idx ].gaddr_hi;
868 :
869 0 : dirty = 1;
870 0 : RESTORE_DATA( fd_wksp_laddr_fast( wksp, gaddr_lo ), gaddr_hi - gaddr_lo );
871 0 : }
872 :
873 : /* Close the cgroup frame */
874 :
875 0 : RESTORE_CLOSE();
876 :
877 : /* Update verification info */
878 :
879 0 : vol_cgroup_alloc_cnt[ vol_cgroup_cnt ] = ftr_alloc_cnt - part_lo;
880 0 : vol_cgroup_cnt++;
881 :
882 0 : }
883 :
884 : /* Update verification info */
885 :
886 0 : ftr_cgroup_cnt += vol_cgroup_cnt;
887 0 : ftr_volume_cnt++;
888 0 : }
889 :
890 0 : restore_footer:
891 :
892 : /* At this point, the checkpt is positioned at the start of the
893 : footer. Restore and validate it. Note that checkpt data has been
894 : fully decompressed into the wksp pinfo and data region but the wksp
895 : indexing structures have not been rebuilt. Further note that
896 : restoring the footer is pure validation. */
897 :
898 0 : FD_LOG_INFO(( "Restoring footer" ));
899 :
900 0 : fd_wksp_checkpt_v2_ftr_t ftr[1];
901 :
902 0 : RESTORE_TEST( !fd_wksp_restore_v2_ftr( restore, hdr, ftr, frame_off + sizeof(fd_wksp_checkpt_v2_ftr_t) ) );
903 :
904 0 : RESTORE_TEST( ftr->alloc_cnt ==ftr_alloc_cnt );
905 0 : RESTORE_TEST( ftr->cgroup_cnt==ftr_cgroup_cnt );
906 0 : RESTORE_TEST( ftr->volume_cnt==ftr_volume_cnt );
907 0 : RESTORE_TEST( ftr->frame_off ==frame_off_prev );
908 :
909 0 : FD_LOG_INFO(( "Rebuilding wksp" ));
910 :
911 : /* Free any remaining old allocations and rebuild the wksp with
912 : the freshly restored allocations */
913 :
914 0 : dirty = 1;
915 0 : for( ulong part_idx=ftr_alloc_cnt; part_idx<part_max; part_idx++ ) pinfo[ part_idx ].tag = 0UL;
916 :
917 0 : if( FD_UNLIKELY( fd_wksp_rebuild( wksp, new_seed ) ) ) goto fail; /* logs details */
918 :
919 0 : FD_LOG_INFO(( "Unlocking wksp" ));
920 :
921 0 : fd_wksp_private_unlock( wksp );
922 :
923 0 : return FD_WKSP_SUCCESS;
924 :
925 0 : fail: /* Release resources that might be reserved */
926 :
927 0 : if( FD_LIKELY( locked ) ) fd_wksp_private_unlock( wksp );
928 :
929 0 : return fd_int_if( dirty, FD_WKSP_ERR_CORRUPT, FD_WKSP_ERR_FAIL );
930 0 : }
931 :
932 : int
933 : fd_wksp_private_restore_v2( fd_tpool_t * tpool,
934 : ulong t0,
935 : ulong t1,
936 : fd_wksp_t * wksp,
937 : char const * path,
938 63 : uint new_seed ) {
939 :
940 63 : FD_LOG_INFO(( "Restoring checkpt \"%s\" into wksp \"%s\" (seed %u)", path, wksp->name, new_seed ));
941 :
942 63 : int fd = -1;
943 63 : void const * mmio = NULL;
944 63 : ulong mmio_sz = 0UL;
945 63 : fd_restore_t * restore = NULL;
946 :
947 63 : fd_restore_t _restore[ 1 ];
948 63 : uchar rbuf[ FD_RESTORE_RBUF_MIN ];
949 :
950 63 : FD_LOG_INFO(( "Opening checkpt" ));
951 :
952 63 : fd = open( path, O_RDONLY, (mode_t)0 );
953 63 : if( FD_UNLIKELY( fd==-1 ) ) {
954 0 : FD_LOG_WARNING(( "open(\"%s\",O_RDONLY,0) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
955 0 : goto fail;
956 0 : }
957 :
958 63 : int err = fd_io_mmio_init( fd, FD_IO_MMIO_MODE_READ_ONLY, &mmio, &mmio_sz );
959 63 : if( FD_LIKELY( !err ) ) {
960 :
961 63 : FD_LOG_INFO(( "Restoring checkpt with mmio" ));
962 :
963 : /* FIXME: consider trimming off prefix / suffix here (i.e. scan for
964 : MAGIC / ~MAGIC) */
965 :
966 63 : restore = fd_restore_init_mmio( _restore, mmio, mmio_sz ); /* logs details */
967 63 : if( FD_UNLIKELY( !restore ) ) goto fail;
968 :
969 63 : err = fd_wksp_private_restore_v2_mmio( tpool, t0, t1, wksp, restore, new_seed ); /* logs details */
970 63 : if( FD_UNLIKELY( err ) ) goto fail;
971 :
972 63 : } else {
973 :
974 0 : FD_LOG_INFO(( "\"%s\" does not appear to support mmio (%i-%s); restoring checkpt with streaming",
975 0 : path, err, fd_io_strerror( err ) ));
976 :
977 : /* FIXME: consider trimming off prefix (i.e. scan for MAGIC) here */
978 :
979 0 : restore = fd_restore_init_stream( _restore, fd, rbuf, FD_RESTORE_RBUF_MIN ); /* logs details */
980 0 : if( FD_UNLIKELY( !restore ) ) goto fail;
981 :
982 0 : err = fd_wksp_private_restore_v2_stream( wksp, restore, new_seed ); /* logs details */
983 0 : if( FD_UNLIKELY( err ) ) goto fail;
984 :
985 0 : }
986 :
987 63 : FD_LOG_INFO(( "Closing checkpt" ));
988 :
989 63 : if( FD_UNLIKELY( !fd_restore_fini( restore ) ) ) /* logs details */
990 0 : FD_LOG_WARNING(( "fd_restore_fini failed; attempting to continue" ));
991 :
992 63 : if( FD_LIKELY( mmio_sz ) ) fd_io_mmio_fini( mmio, mmio_sz );
993 :
994 63 : if( FD_UNLIKELY( close( fd ) ) )
995 0 : FD_LOG_WARNING(( "close(\"%s\") failed (%i-%s); attempting to continue", path, errno, fd_io_strerror( errno ) ));
996 :
997 63 : return err;
998 :
999 0 : fail:
1000 :
1001 0 : if( FD_LIKELY( restore ) ) {
1002 0 : if( FD_UNLIKELY( fd_restore_in_frame( restore ) ) && FD_UNLIKELY( fd_restore_close( restore ) ) )
1003 0 : FD_LOG_WARNING(( "fd_restore_close failed; attempting to continue" ));
1004 :
1005 0 : if( FD_UNLIKELY( !fd_restore_fini( restore ) ) ) /* logs details */
1006 0 : FD_LOG_WARNING(( "fd_restore_fini failed; attempting to continue" ));
1007 0 : }
1008 :
1009 0 : if( FD_LIKELY( mmio_sz ) ) fd_io_mmio_fini( mmio, mmio_sz );
1010 :
1011 0 : if( FD_LIKELY( fd!=-1 ) && FD_UNLIKELY( close( fd ) ) )
1012 0 : FD_LOG_WARNING(( "close(\"%s\") failed (%i-%s); attempting to continue", path, errno, fd_io_strerror( errno ) ));
1013 :
1014 0 : return FD_WKSP_ERR_FAIL;
1015 63 : }
1016 :
1017 : int
1018 : fd_wksp_private_printf_v2( int out,
1019 : char const * path,
1020 63 : int verbose ) {
1021 :
1022 63 : int ret = 0;
1023 117 : # define TRAP(x) do { int _err = (x); if( FD_UNLIKELY( _err<0 ) ) { ret = _err; goto fail; } ret += _err; } while(0)
1024 :
1025 63 : int fd = -1;
1026 63 : fd_restore_t * restore = NULL;
1027 :
1028 63 : fd_restore_t _restore[ 1 ];
1029 63 : uchar rbuf[ FD_RESTORE_RBUF_MIN ];
1030 :
1031 : /* Print the header and metadata */
1032 :
1033 63 : if( verbose>=1 ) {
1034 :
1035 : /* Open the restore */
1036 :
1037 63 : fd = open( path, O_RDONLY, (mode_t)0 );
1038 63 : if( FD_UNLIKELY( fd==-1 ) ) {
1039 0 : FD_LOG_WARNING(( "open(\"%s\",O_RDONLY,0) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
1040 0 : goto fail;
1041 0 : }
1042 :
1043 63 : restore = fd_restore_init_stream( _restore, fd, rbuf, FD_RESTORE_RBUF_MIN ); /* logs details */
1044 63 : if( FD_UNLIKELY( !restore ) ) goto fail;
1045 :
1046 : /* Restore the header */
1047 :
1048 63 : fd_wksp_checkpt_v2_hdr_t hdr[1];
1049 :
1050 63 : RESTORE_TEST( !fd_wksp_restore_v2_hdr( restore, hdr ) );
1051 :
1052 : /* Restore the info */
1053 :
1054 63 : fd_wksp_checkpt_v2_info_t info[1];
1055 63 : char info_buf[ 65536 ];
1056 63 : char const * info_cstr[ 9 ];
1057 :
1058 63 : RESTORE_TEST( !fd_wksp_restore_v2_info( restore, hdr, info, info_buf, 65536UL, info_cstr ) );
1059 :
1060 63 : char info_wallclock[ FD_LOG_WALLCLOCK_CSTR_BUF_SZ ];
1061 63 : fd_log_wallclock_cstr( info->wallclock, info_wallclock );
1062 :
1063 : /* Pretty print the header and info */
1064 :
1065 63 : TRAP( dprintf( out,
1066 : //"\tstyle %-20i\n" /* verbose 0 info (already printed) */
1067 : //"\tname %s\n"
1068 : //"\tseed %-20u\n"
1069 : //"\tpart_max %-20lu\n"
1070 : //"\tdata_max %-20lu\n"
1071 63 : "\tmagic %016lx\n" /* verbose 1 info */
1072 63 : "\twallclock %-20li (%s)\n"
1073 63 : "\tapp %-20lu (%s)\n"
1074 63 : "\tthread %-20lu (%s)\n"
1075 63 : "\thost %-20lu (%s)\n"
1076 63 : "\tcpu %-20lu (%s)\n"
1077 63 : "\tgroup %-20lu (%s)\n"
1078 63 : "\ttid %-20lu\n"
1079 63 : "\tuser %-20lu (%s)\n"
1080 63 : "\tframe_style_compressed %-20i\n", /* (v2 specific) */
1081 63 : hdr->magic,
1082 63 : info->wallclock, info_wallclock,
1083 63 : info->app_id, info_cstr[0],
1084 63 : info->thread_id, info_cstr[1],
1085 63 : info->host_id, info_cstr[2],
1086 63 : info->cpu_id, info_cstr[3],
1087 63 : info->group_id, info_cstr[4],
1088 63 : info->tid,
1089 63 : info->user_id, info_cstr[5],
1090 63 : hdr->frame_style_compressed ) );
1091 :
1092 63 : if( verbose>=2 )
1093 54 : TRAP( dprintf( out, "\tmode %03lo\n" /* (v2 specific) */
1094 63 : "\tpath\n\t\t%s\n" /* (v2 specific) */
1095 63 : "\tbinfo\n\t\t%s\n"
1096 63 : "\tuinfo\n\t\t%s\n",
1097 63 : info->mode, info_cstr[6], info_cstr[7], info_cstr[8] ) );
1098 :
1099 : /* FIXME: consider implement handling of verbose>=3. Since data in a
1100 : compressed frame can't be easily skipped over (due to sequential
1101 : dependencies between compressed data bufs inherently induced by
1102 : compression algos), we would:
1103 :
1104 : Use stat to get the size of the checkpt, seek to the end of the
1105 : file and restore the footer frame to get the appendix frame
1106 : location, seek to the appendix frame, and restore it to get the
1107 : cgroup frame offsets and partition counts. Then, for each cgroup,
1108 : seek to the cgruop frame, init a streaming restore, open the frame,
1109 : restore the partition count and partition metadata (which is
1110 : conveniently located at the start of a cgroup frame), close it and
1111 : fini the restore. Omitting for now as this isn't particularly
1112 : important functionality. */
1113 :
1114 : /* Finish restoring */
1115 :
1116 63 : if( FD_UNLIKELY( !fd_restore_fini( restore ) ) ) /* logs details */
1117 0 : FD_LOG_WARNING(( "fd_restore_fini failed; attempting to continue" ));
1118 :
1119 63 : if( FD_UNLIKELY( close( fd ) ) )
1120 0 : FD_LOG_WARNING(( "close failed (%i-%s); attempting to continue", errno, fd_io_strerror( errno ) ));
1121 63 : }
1122 :
1123 63 : # undef TRAP
1124 :
1125 63 : return ret;
1126 :
1127 0 : fail: /* Release resources that might be reserved */
1128 :
1129 0 : if( FD_LIKELY( restore ) ) {
1130 0 : if( FD_UNLIKELY( fd_restore_in_frame( restore ) ) && FD_UNLIKELY( fd_restore_close( restore ) ) )
1131 0 : FD_LOG_WARNING(( "fd_restore_close failed; attempting to continue" ));
1132 :
1133 0 : if( FD_UNLIKELY( !fd_restore_fini( restore ) ) ) /* logs details */
1134 0 : FD_LOG_WARNING(( "fd_restore_fini failed; attempting to continue" ));
1135 0 : }
1136 :
1137 0 : if( FD_LIKELY( fd!=-1 ) && FD_UNLIKELY( close( fd ) ) )
1138 0 : FD_LOG_WARNING(( "close failed (%i-%s); attempting to continue", errno, fd_io_strerror( errno ) ));
1139 :
1140 0 : return ret;
1141 63 : }
1142 :
1143 : #undef RESTORE_TEST
1144 : #undef RESTORE_DATA
1145 : #undef RESTORE_META
1146 : #undef RESTORE_CLOSE
1147 : #undef RESTORE_OPEN
1148 : #undef RESTORE_SEEK
|