Line data Source code
1 : #include "fd_wksp_private.h"
2 :
3 : #include <errno.h>
4 : #include <unistd.h>
5 : #include <fcntl.h>
6 : #include <sys/stat.h>
7 :
8 : /* This is an implementation detail and not strictly part of the v2
9 : specification. */
10 :
11 36 : #define FD_WKSP_CHECKPT_V2_CGROUP_MAX (1024UL)
12 :
13 : int
14 : fd_wksp_private_checkpt_v2( fd_tpool_t * tpool,
15 : ulong t0,
16 : ulong t1,
17 : fd_wksp_t * wksp,
18 : char const * path,
19 : ulong mode,
20 : char const * uinfo,
21 36 : int frame_style_compressed ) {
22 :
23 36 : (void)tpool; (void)t0; (void)t1; /* Thread parallelization not currently implemented */
24 :
25 36 : char const * binfo = fd_log_build_info;
26 :
27 36 : if( FD_UNLIKELY( !fd_checkpt_frame_style_is_supported( frame_style_compressed ) ) ) {
28 0 : FD_LOG_WARNING(( "compressed frames are not supported on this target" ));
29 0 : return FD_WKSP_ERR_INVAL;
30 0 : }
31 :
32 36 : int err_fail;
33 :
34 36 : int locked = 0;
35 36 : int fd = -1;
36 36 : fd_checkpt_t * checkpt = NULL;
37 :
38 36 : fd_wksp_private_pinfo_t * pinfo = fd_wksp_private_pinfo( wksp );
39 :
40 36 : char const * name = wksp->name;
41 36 : ulong name_len = fd_shmem_name_len( name );
42 36 : if( FD_UNLIKELY( !name_len ) ) {
43 0 : FD_LOG_WARNING(( "checkpt wksp to \"%s\" failed due to bad name; attempting to continue", path ));
44 0 : err_fail = FD_WKSP_ERR_CORRUPT;
45 0 : goto fail;
46 0 : }
47 :
48 : /* Lock the wksp */
49 :
50 36 : {
51 36 : int _err = fd_wksp_private_lock( wksp ); /* logs details */
52 36 : if( FD_UNLIKELY( _err ) ) {
53 0 : FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed due to being locked; attempting to continue", name, path ));
54 0 : err_fail = _err;
55 0 : goto fail;
56 0 : }
57 36 : locked = 1;
58 36 : }
59 :
60 : /* Determine a reasonable number of cgroups (note: in principle we
61 : could thread parallelize this but it probably isn't worth the extra
62 : complexity). */
63 :
64 0 : ulong cgroup_cnt;
65 36 : ulong alloc_cnt = 0UL;
66 :
67 36 : {
68 378 : # define WKSP_TEST( c ) do { \
69 378 : if( FD_UNLIKELY( !(c) ) ) { \
70 0 : FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed due to failing test %s; attempting to continue", \
71 0 : name, path, #c )); \
72 0 : err_fail = FD_WKSP_ERR_CORRUPT; \
73 0 : goto fail; \
74 0 : } \
75 378 : } while(0)
76 :
77 : /* Count the number of allocations by traversing over all partitions
78 : in reverse order by gaddr_lo (same iteration we will do to assign
79 : partitions to cgroups), validating as we go. */
80 :
81 36 : ulong part_max = wksp->part_max;
82 36 : ulong data_lo = wksp->gaddr_lo;
83 36 : ulong data_hi = wksp->gaddr_hi;
84 36 : ulong cycle_tag = wksp->cycle_tag++;
85 :
86 36 : WKSP_TEST( (0UL<data_lo) & (data_lo<=data_hi) ); /* Valid data region */
87 :
88 36 : ulong gaddr_last = data_hi;
89 :
90 36 : ulong part_idx = fd_wksp_private_pinfo_idx( wksp->part_tail_cidx );
91 138 : while( !fd_wksp_private_pinfo_idx_is_null( part_idx ) ) {
92 :
93 : /* Load partition metadata and validate it */
94 :
95 102 : WKSP_TEST( part_idx<part_max ); /* Valid idx */
96 102 : WKSP_TEST( pinfo[ part_idx ].cycle_tag!=cycle_tag ); /* No cycles */
97 102 : pinfo[ part_idx ].cycle_tag = cycle_tag; /* Mark part_idx as visited */
98 :
99 102 : ulong gaddr_lo = pinfo[ part_idx ].gaddr_lo;
100 102 : ulong gaddr_hi = pinfo[ part_idx ].gaddr_hi;
101 102 : ulong tag = pinfo[ part_idx ].tag;
102 :
103 102 : WKSP_TEST( (data_lo<=gaddr_lo) & (gaddr_lo<gaddr_hi) & (gaddr_hi==gaddr_last) ); /* Valid partition range */
104 102 : gaddr_last = gaddr_lo;
105 :
106 : /* If this partition holds an allocation, count it */
107 :
108 102 : alloc_cnt += (ulong)(tag>0UL);
109 :
110 : /* Advance to the previous partition */
111 :
112 102 : part_idx = fd_wksp_private_pinfo_idx( pinfo[ part_idx ].prev_cidx );
113 102 : }
114 :
115 36 : WKSP_TEST( gaddr_last==data_lo ); /* Complete partitioning */
116 :
117 : /* Compute a reasonable cgroup_cnt for alloc_cnt. To do this,
118 : let N be the number of allocations. We assume they have IID
119 : sizes with mean U and standard deviation S. If we assign each
120 : allocation to 1 of M cgroups IID uniform random (we will do
121 : better below but we pessimize here), in the limit of N>>M>>1, a
122 : cgroup's load (total number of allocation bytes assigned to a
123 : cgroup to compress) is Gaussian (by central limit theorem) with
124 : mean (N/M)U with standard deviation sqrt(N/M) sqrt(U^2+S^2).
125 :
126 : That is, we are load balanced on average (yay) but there is some
127 : natural imbalance expected due to statistical fluctuations (boo).
128 : Noting that allocation sizes are positive, if we further assume
129 : that S<~U typically (note that it is theoretically possible to
130 : have a positive valued random variable with S arbitrarily larger
131 : than U), then the cgroup load standard deviation is typically
132 : less than sqrt(2N/M) U.
133 :
134 : The load for each cgroup will be approximately independent of
135 : each other for M>>1. Extremal value statistics for a Gaussian
136 : then implies that the least loaded cgroup is typically likely to
137 : have more than (N/M)U - sqrt(2N/M) U sqrt(2 ln M) load. We want
138 : this to be positive such such that the least loaded cgroup will
139 : typically have some load:
140 :
141 : (N/M)U >> sqrt((4 N ln M)/M))U
142 : -> (N/M) >> 4 ln M
143 :
144 : That is, we want pick the number of cgroups such that the number
145 : of allocations per cgroup on average much greater than a few
146 : times the natural log of the number of cgroups.
147 :
148 : Given the number of cgroups is at most CGROUP_MAX ~ 1024, the
149 : above implies if we target more than ~28 allocations per cgroup
150 : on average, each cgroup is likely to get some load and cgroups
151 : will be reasonably load balanced on average. We use 32 below for
152 : computational convenience. */
153 :
154 36 : cgroup_cnt = fd_ulong_min( (alloc_cnt+31UL)/32UL, FD_WKSP_CHECKPT_V2_CGROUP_MAX );
155 :
156 36 : # undef WKSP_TEST
157 36 : }
158 :
159 : /* Assign allocations to cgroups (note: in principle we could thread
160 : parallelize this but it also probably isn't worth the extra
161 : complexity). */
162 :
163 0 : uint cgroup_head_cidx[ FD_WKSP_CHECKPT_V2_CGROUP_MAX ]; /* Head of a linked list for partitions assigned to each cgroup */
164 36 : ulong cgroup_alloc_cnt[ FD_WKSP_CHECKPT_V2_CGROUP_MAX ]; /* Number of partitions in each cgroup */
165 :
166 36 : {
167 :
168 : /* Initialize the cgroups to empty */
169 :
170 36 : ulong cgroup_load[ FD_WKSP_CHECKPT_V2_CGROUP_MAX ];
171 :
172 36 : uint null_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
173 51 : for( ulong cgroup_idx=0UL; cgroup_idx<cgroup_cnt; cgroup_idx++ ) {
174 15 : cgroup_head_cidx[ cgroup_idx ] = null_cidx;
175 15 : cgroup_alloc_cnt[ cgroup_idx ] = 0UL;
176 15 : cgroup_load [ cgroup_idx ] = 0UL;
177 15 : }
178 :
179 : /* Configure cgroup sampling */
180 :
181 36 : ulong cgroup_cursor = 0UL;
182 36 : ulong cgroup_idx = 0UL;
183 :
184 : /* For all partitions in reverse order by gaddr_lo */
185 :
186 36 : ulong part_idx = fd_wksp_private_pinfo_idx( wksp->part_tail_cidx );
187 138 : while( !fd_wksp_private_pinfo_idx_is_null( part_idx ) ) {
188 :
189 : /* Load partition metadata */
190 :
191 102 : ulong gaddr_lo = pinfo[ part_idx ].gaddr_lo;
192 102 : ulong gaddr_hi = pinfo[ part_idx ].gaddr_hi;
193 102 : ulong tag = pinfo[ part_idx ].tag;
194 :
195 : /* If this partition holds an allocation, deterministically assign
196 : it to a cgroup in an approximately load balanced way such that
197 : the assignments will be identical for the same set of
198 : allocations and cgroup_cnt. */
199 :
200 102 : if( tag ) { /* ~50/50 */
201 :
202 : /* Sample a handful of cgroups and pick the least loaded to
203 : approximate a greedy load balance method. We consider the
204 : most recently assigned cgroup (which was thought to be
205 : lightly loaded at the previous assignment), a cyclically
206 : sampled cgroup (ala striping) and two pseudo-randomly sampled
207 : cgroups based on the common hash of gaddr_lo (ala random
208 : assignment). We don't care if our samples collide; we are
209 : just trying to improve on load balance over straight striping
210 : and random sampling (both of which are already asymptotically
211 : are load balanced as per the above).
212 :
213 : We could use a min-heap here but that would be
214 : algorithmically more expensive, more complex to implement and
215 : unlikely to improve load balance much futher (it would be
216 : the greedy load balance method, which is also asymptotically
217 : optimal but not perfect ... perfect load balance is a
218 : computationally hard knapsack like problem but pretty good
219 : load balance is easy). */
220 :
221 51 : {
222 51 : ulong h = fd_ulong_hash( gaddr_lo );
223 :
224 51 : ulong i0 = cgroup_idx; ulong l0 = cgroup_load[ i0 ];
225 51 : ulong i1 = cgroup_cursor; ulong l1 = cgroup_load[ i1 ];
226 51 : ulong i2 = h % cgroup_cnt; ulong l2 = cgroup_load[ i2 ];
227 51 : ulong i3 = (h >> 32) % cgroup_cnt; ulong l3 = cgroup_load[ i3 ];
228 :
229 51 : i0 = fd_ulong_if( l0<=l1, i0, i1 ); l0 = fd_ulong_min( l0, l1 );
230 51 : i1 = fd_ulong_if( l2<=l3, i2, i3 ); l1 = fd_ulong_min( l2, l3 );
231 51 : i0 = fd_ulong_if( l0<=l1, i0, i1 ); l0 = fd_ulong_min( l0, l1 );
232 :
233 51 : cgroup_cursor = fd_ulong_if( cgroup_cursor<cgroup_cnt-1UL, cgroup_cursor+1UL, 0UL );
234 51 : cgroup_idx = i0;
235 51 : }
236 :
237 : /* Update this cgroup's partition count and load. The load is
238 : currently the total uncompressed bytes of partition metadata
239 : and data (TODO: consider adding a fixed base cost here to
240 : account for fixed computational overheads too. This would be
241 : an order of magnitude ballpark of the cost of doing 2
242 : fd_checkpt_buf relative to the marginal cost of checkpointing
243 : an additional byte for some representative target ... note
244 : that specific target details should not be incorporated into
245 : this because then specific checkpt byte stream would be
246 : sensitive to who wrote the checkpt and ideally checkpt should
247 : be bit-for-bit identical for identical wksp regardless of the
248 : target details). */
249 :
250 51 : cgroup_alloc_cnt[ cgroup_idx ]++;
251 51 : cgroup_load [ cgroup_idx ] += 3UL*sizeof(ulong) + (gaddr_hi - gaddr_lo);
252 :
253 : /* Push this partition onto the cgroup's stack. Since we are
254 : iterating over partitions in reverse order by gaddr_lo, the
255 : stack for each cgroup can be treated as a linked list in
256 : sorted order by gaddr_lo (helps with metdata
257 : compressibility). */
258 :
259 51 : pinfo[ part_idx ].stack_cidx = cgroup_head_cidx[ cgroup_idx ];
260 51 : cgroup_head_cidx[ cgroup_idx ] = fd_wksp_private_pinfo_cidx( part_idx );
261 51 : }
262 :
263 : /* Advance to the previous partition */
264 :
265 102 : part_idx = fd_wksp_private_pinfo_idx( pinfo[ part_idx ].prev_cidx );
266 102 : }
267 36 : }
268 :
269 : /* At this point, each wksp partitions to checkpt have been assigned
270 : to a cgroup, the cgroups are approximately load balanced and the
271 : partitions for each cgroup are given in a singly linked list sorted
272 : in ascending order by gaddr_lo. */
273 :
274 : /* Create the checkpt file */
275 :
276 36 : {
277 36 : mode_t old_mask = umask( (mode_t)0 );
278 36 : fd = open( path, O_CREAT|O_EXCL|O_WRONLY, (mode_t)mode );
279 36 : umask( old_mask );
280 36 : if( FD_UNLIKELY( fd==-1 ) ) {
281 9 : FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed opening file with flags_O_CREAT|O_EXCL|O_WRONLY in mode 0%03lo "
282 9 : "(%i-%s); attempting to continue", name, path, mode, errno, fd_io_strerror( errno ) ));
283 9 : err_fail = FD_WKSP_ERR_FAIL;
284 9 : goto fail;
285 9 : }
286 36 : }
287 :
288 : /* Initialize the checkpt */
289 :
290 27 : ulong frame_off[ FD_WKSP_CHECKPT_V2_CGROUP_MAX+6UL ];
291 27 : ulong frame_cnt = 0UL;
292 :
293 27 : fd_checkpt_t _checkpt[ 1 ];
294 27 : uchar wbuf[ FD_CHECKPT_WBUF_MIN ];
295 :
296 27 : checkpt = fd_checkpt_init_stream( _checkpt, fd, wbuf, FD_CHECKPT_WBUF_MIN ); /* logs details */
297 27 : if( FD_UNLIKELY( !checkpt ) ) {
298 0 : FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed when initializing; attempting to continue", name, path ));
299 0 : err_fail = FD_WKSP_ERR_FAIL;
300 0 : goto fail;
301 0 : }
302 :
303 147 : # define CHECKPT_OPEN(frame_style) do { \
304 147 : int _err = fd_checkpt_open_advanced( checkpt, (frame_style), &frame_off[ frame_cnt ] ); \
305 147 : if( FD_UNLIKELY( _err ) ) { \
306 0 : FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed when opening a %s frame (%i-%s); attempting to continue", \
307 0 : name, path, #frame_style, _err, fd_checkpt_strerror( _err ) )); \
308 0 : err_fail = FD_WKSP_ERR_FAIL; \
309 0 : goto fail; \
310 0 : } \
311 147 : } while(0)
312 :
313 147 : # define CHECKPT_CLOSE() do { \
314 147 : frame_cnt++; \
315 147 : int _err = fd_checkpt_close_advanced( checkpt, &frame_off[ frame_cnt ] ); /* logs details */ \
316 147 : if( FD_UNLIKELY( _err ) ) { \
317 0 : FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed when closing a frame (%i-%s); attempting to continue", \
318 0 : name, path, _err, fd_checkpt_strerror( _err ) )); \
319 0 : err_fail = FD_WKSP_ERR_FAIL; \
320 0 : goto fail; \
321 0 : } \
322 147 : } while(0)
323 :
324 : /* Note: sz must be at most FD_CHECKPT_META_MAX */
325 105 : # define CHECKPT_META( meta, sz ) do { \
326 105 : ulong _sz = (sz); \
327 105 : int _err = fd_checkpt_meta( checkpt, (meta), _sz ); /* logs details */ \
328 105 : if( FD_UNLIKELY( _err ) ) { \
329 0 : FD_LOG_WARNING(( "checkpt to \"%s\" failed when writing %lu bytes metadata %s (%i-%s); attempting to continue", \
330 0 : path, _sz, #meta, _err, fd_checkpt_strerror( _err ) )); \
331 0 : err_fail = FD_WKSP_ERR_FAIL; \
332 0 : goto fail; \
333 0 : } \
334 105 : } while(0)
335 :
336 : /* Note: data must exist and be unchanged until frame close */
337 201 : # define CHECKPT_DATA( data, sz ) do { \
338 201 : ulong _sz = (sz); \
339 201 : int _err = fd_checkpt_data( checkpt, (data), _sz ); /* logs details */ \
340 201 : if( FD_UNLIKELY( _err ) ) { \
341 0 : FD_LOG_WARNING(( "checkpt to \"%s\" failed when writing %lu bytes data %s (%i-%s); attempting to continue", \
342 0 : path, _sz, #data, _err, fd_checkpt_strerror( _err ) )); \
343 0 : err_fail = FD_WKSP_ERR_FAIL; \
344 0 : goto fail; \
345 0 : } \
346 201 : } while(0)
347 :
348 : /* Checkpt the header */
349 :
350 27 : {
351 27 : fd_wksp_checkpt_v2_hdr_t hdr[1];
352 :
353 27 : hdr->magic = wksp->magic;
354 27 : hdr->style = FD_WKSP_CHECKPT_STYLE_V2;
355 27 : hdr->frame_style_compressed = frame_style_compressed;
356 27 : hdr->reserved = 0U;
357 27 : memset( hdr->name, 0, FD_SHMEM_NAME_MAX ); /* Make sure trailing zeros clear */
358 27 : memcpy( hdr->name, name, name_len );
359 27 : hdr->seed = wksp->seed;
360 27 : hdr->part_max = wksp->part_max;
361 27 : hdr->data_max = wksp->data_max;
362 :
363 27 : CHECKPT_OPEN( FD_CHECKPT_FRAME_STYLE_RAW );
364 27 : CHECKPT_DATA( hdr, sizeof(fd_wksp_checkpt_v2_hdr_t) );
365 27 : CHECKPT_CLOSE();
366 27 : }
367 :
368 : /* Checkpt the info */
369 :
370 27 : {
371 27 : fd_wksp_checkpt_v2_info_t info[1];
372 27 : char buf[ 65536 ];
373 27 : char * p = buf;
374 :
375 27 : info->mode = mode;
376 27 : info->wallclock = fd_log_wallclock();
377 27 : info->app_id = fd_log_app_id ();
378 27 : info->thread_id = fd_log_thread_id();
379 27 : info->host_id = fd_log_host_id ();
380 27 : info->cpu_id = fd_log_cpu_id ();
381 27 : info->group_id = fd_log_group_id ();
382 27 : info->tid = fd_log_tid ();
383 27 : info->user_id = fd_log_user_id ();
384 :
385 243 : # define APPEND_CSTR( field, cstr, len ) do { \
386 243 : ulong _len = (len); \
387 243 : memcpy( p, (cstr), _len ); \
388 243 : p[ _len ] = '\0'; \
389 243 : info->sz_##field = _len + 1UL; \
390 243 : p += info->sz_##field; \
391 243 : } while(0)
392 :
393 27 : APPEND_CSTR( app, fd_log_app(), strlen( fd_log_app() ) ); /* appends at most FD_LOG_NAME_MAX ~ 40 B */
394 27 : APPEND_CSTR( thread, fd_log_thread(), strlen( fd_log_thread() ) ); /* " */
395 27 : APPEND_CSTR( host, fd_log_host(), strlen( fd_log_host() ) ); /* " */
396 27 : APPEND_CSTR( cpu, fd_log_cpu(), strlen( fd_log_cpu() ) ); /* " */
397 27 : APPEND_CSTR( group, fd_log_group(), strlen( fd_log_group() ) ); /* " */
398 27 : APPEND_CSTR( user, fd_log_user(), strlen( fd_log_user() ) ); /* " */
399 27 : APPEND_CSTR( path, path, strlen( path ) ); /* appends at most PATH_MAX-1 ~ 4 KiB */
400 27 : APPEND_CSTR( binfo, binfo, fd_cstr_nlen( binfo, FD_WKSP_CHECKPT_V2_BINFO_MAX-1UL ) ); /* appends at most 16 KiB */
401 27 : APPEND_CSTR( uinfo, uinfo, fd_cstr_nlen( uinfo, FD_WKSP_CHECKPT_V2_UINFO_MAX-1UL ) ); /* " */
402 :
403 27 : # undef APPEND_CSTR
404 :
405 : /* Write the info */
406 :
407 27 : CHECKPT_OPEN( frame_style_compressed );
408 27 : CHECKPT_DATA( info, sizeof(fd_wksp_checkpt_v2_info_t) );
409 27 : CHECKPT_DATA( buf, (ulong)(p-buf) );
410 27 : CHECKPT_CLOSE();
411 27 : }
412 :
413 : /* Checkpt the volume cgroups. Note: This implementation just
414 : checkpoints 1 volume with at most CGROUP_MAX cgroup_cnt groups.
415 :
416 : Note: this loop can be parallelized over multiple threads if
417 : willing to leave holes in the file (and then maybe do a second pass
418 : to compact the holes or maybe do a planning pass and then a real
419 : pass or maybe leave the holes and do a second pass of run length
420 : and entropy coding or maybe write to separate files and distribute
421 : as a multiple files or maybe use non-POSIX filesystem mojo to
422 : stitch together the separate files to appear as one file or ...) */
423 :
424 39 : for( ulong cgroup_idx=0UL; cgroup_idx<cgroup_cnt; cgroup_idx++ ) {
425 :
426 12 : CHECKPT_OPEN( frame_style_compressed );
427 :
428 : /* Write cgroup commands */
429 :
430 12 : fd_wksp_checkpt_v2_cmd_t cmd[1];
431 :
432 12 : ulong part_idx = fd_wksp_private_pinfo_idx( cgroup_head_cidx[ cgroup_idx ] );
433 51 : while( !fd_wksp_private_pinfo_idx_is_null( part_idx ) ) {
434 :
435 : /* Command: "meta (tag,gaddr_lo,gaddr_hi)" */
436 :
437 39 : cmd->meta.tag = pinfo[ part_idx ].tag; /* Note: non-zero */
438 39 : cmd->meta.gaddr_lo = pinfo[ part_idx ].gaddr_lo;
439 39 : cmd->meta.gaddr_hi = pinfo[ part_idx ].gaddr_hi;
440 :
441 39 : CHECKPT_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
442 :
443 39 : part_idx = fd_wksp_private_pinfo_idx( pinfo[ part_idx ].stack_cidx );
444 39 : }
445 :
446 : /* Command: "corresponding data follows" */
447 :
448 12 : cmd->data.tag = 0UL;
449 12 : cmd->data.cgroup_cnt = ULONG_MAX;
450 12 : cmd->data.frame_off = ULONG_MAX;
451 :
452 12 : CHECKPT_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
453 :
454 : /* Write cgroup partition data */
455 :
456 12 : part_idx = fd_wksp_private_pinfo_idx( cgroup_head_cidx[ cgroup_idx ] );
457 51 : while( !fd_wksp_private_pinfo_idx_is_null( part_idx ) ) {
458 39 : ulong gaddr_lo = pinfo[ part_idx ].gaddr_lo;
459 39 : ulong gaddr_hi = pinfo[ part_idx ].gaddr_hi;
460 :
461 39 : CHECKPT_DATA( fd_wksp_laddr_fast( wksp, gaddr_lo ), gaddr_hi - gaddr_lo );
462 :
463 39 : part_idx = fd_wksp_private_pinfo_idx( pinfo[ part_idx ].stack_cidx );
464 39 : }
465 :
466 12 : CHECKPT_CLOSE();
467 :
468 12 : }
469 :
470 : /* Checkpt the volume appendix. This starts with a command that
471 : indicates this frame is an appendix for cgroup_cnt cgroups (this
472 : can be used in a streaming restore to tell when it has reached the
473 : appendix and in a parallel restore of the appendix so a parallel
474 : restore thread knows how much it needs to decompress), the offsets
475 : of each cgroup frame (so parallel restore threads can seek to the
476 : partitions assigned to them) and the number of partitions in each
477 : cgroup frame (so that the pinfo on restore can be partitioned over
478 : parallel restore threads upfront non-atomically and
479 : deterministically). */
480 :
481 27 : {
482 27 : fd_wksp_checkpt_v2_cmd_t cmd[1];
483 :
484 : /* Command: "appendix for a volume with cgroup_cnt frames and no
485 : previous volumes" */
486 :
487 27 : cmd->appendix.tag = 0UL;
488 27 : cmd->appendix.cgroup_cnt = cgroup_cnt;
489 27 : cmd->appendix.frame_off = 0UL;
490 :
491 27 : CHECKPT_OPEN( frame_style_compressed );
492 27 : CHECKPT_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) ); /* Note: must be meta for restore */
493 27 : CHECKPT_DATA( frame_off+2UL, cgroup_cnt*sizeof(ulong) );
494 27 : CHECKPT_DATA( cgroup_alloc_cnt, cgroup_cnt*sizeof(ulong) );
495 27 : CHECKPT_CLOSE();
496 27 : }
497 :
498 : /* Checkpt the volumes frame */
499 :
500 27 : {
501 27 : fd_wksp_checkpt_v2_cmd_t cmd[1];
502 :
503 : /* Command: "no more volumes */
504 :
505 27 : cmd->volumes.tag = 0UL;
506 27 : cmd->volumes.cgroup_cnt = ULONG_MAX;
507 27 : cmd->volumes.frame_off = frame_off[ frame_cnt-1 ];
508 :
509 27 : CHECKPT_OPEN( frame_style_compressed );
510 27 : CHECKPT_META( cmd, sizeof(fd_wksp_checkpt_v2_cmd_t) );
511 27 : CHECKPT_CLOSE();
512 27 : }
513 :
514 : /* Checkpt the footer */
515 :
516 27 : {
517 27 : fd_wksp_checkpt_v2_ftr_t ftr[1];
518 :
519 : /* Command: "footer for a checkpt with cgroup_cnt total cgroup
520 : frames" */
521 :
522 27 : ftr->alloc_cnt = alloc_cnt;
523 27 : ftr->cgroup_cnt = cgroup_cnt;
524 27 : ftr->volume_cnt = 1UL;
525 27 : ftr->frame_off = frame_off[ frame_cnt-1U ];
526 27 : ftr->checkpt_sz = frame_off[ frame_cnt ] + sizeof(fd_wksp_checkpt_v2_ftr_t);
527 27 : ftr->data_max = wksp->data_max;
528 27 : ftr->part_max = wksp->part_max;
529 27 : ftr->seed = wksp->seed;
530 27 : memset( ftr->name, 0, FD_SHMEM_NAME_MAX ); /* Make sure trailing zeros clear */
531 27 : memcpy( ftr->name, name, name_len );
532 27 : ftr->reserved = 0U;
533 27 : ftr->frame_style_compressed = frame_style_compressed;
534 27 : ftr->style = FD_WKSP_CHECKPT_STYLE_V2;
535 27 : ftr->unmagic = ~wksp->magic;
536 :
537 27 : CHECKPT_OPEN( FD_CHECKPT_FRAME_STYLE_RAW );
538 27 : CHECKPT_DATA( ftr, sizeof(fd_wksp_checkpt_v2_ftr_t) );
539 27 : CHECKPT_CLOSE();
540 27 : }
541 :
542 27 : # undef CHECKPT_DATA
543 27 : # undef CHECKPT_META
544 27 : # undef CHECKPT_CLOSE
545 27 : # undef CHECKPT_OPEN
546 :
547 : /* Finalize the checkpt */
548 :
549 27 : if( FD_UNLIKELY( !fd_checkpt_fini( checkpt ) ) ) { /* logs details */
550 0 : FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed when finalizing; attempting to continue", name, path ));
551 0 : checkpt = NULL;
552 0 : err_fail = FD_WKSP_ERR_FAIL;
553 0 : goto fail;
554 0 : }
555 :
556 : /* Close the file */
557 :
558 27 : if( FD_UNLIKELY( close( fd ) ) ) {
559 0 : FD_LOG_WARNING(( "checkpt wksp \"%s\" to \"%s\" failed when closing; attempting to continue", name, path ));
560 0 : fd = -1;
561 0 : err_fail = FD_WKSP_ERR_FAIL;
562 0 : goto fail;
563 0 : }
564 :
565 : /* Unlock the wksp */
566 :
567 27 : fd_wksp_private_unlock( wksp );
568 27 : locked = 0;
569 :
570 27 : return FD_WKSP_SUCCESS;
571 :
572 9 : fail:
573 :
574 : /* Release resources that might be reserved */
575 :
576 9 : if( FD_LIKELY( checkpt ) ) {
577 0 : if( FD_UNLIKELY( fd_checkpt_in_frame( checkpt ) ) && FD_UNLIKELY( fd_checkpt_close( checkpt ) ) )
578 0 : FD_LOG_WARNING(( "fd_checkpt_close failed; attempting to continue" ));
579 :
580 0 : if( FD_UNLIKELY( !fd_checkpt_fini( checkpt ) ) ) /* logs details */
581 0 : FD_LOG_WARNING(( "fd_checkpt_fini failed; attempting to continue" ));
582 0 : }
583 :
584 9 : if( FD_LIKELY( fd!=-1 ) && FD_UNLIKELY( close( fd ) ) )
585 0 : FD_LOG_WARNING(( "close(\"%s\") failed (%i-%s); attempting to continue", path, errno, fd_io_strerror( errno ) ));
586 :
587 9 : if( FD_LIKELY( locked ) ) fd_wksp_private_unlock( wksp );
588 :
589 9 : return err_fail;
590 27 : }
|