Line data Source code
1 : /* This is included directly by fd_vinyl_recover.c */
2 :
3 : ulong
4 0 : fd_vinyl_recover_serial( fd_vinyl_t * vinyl ) {
5 :
6 : /* Iterate over the bstream's past to populate the meta. Note that
7 : our caller flushed the meta cache, data cache and reset the cache
8 : line eviction priorities to their default. */
9 :
10 0 : fd_vinyl_meta_t * meta = vinyl->meta;
11 0 : fd_vinyl_line_t * line = vinyl->line;
12 0 : fd_vinyl_io_t * io = vinyl->io;
13 :
14 0 : ulong line_cnt = vinyl->line_cnt;
15 0 : ulong pair_max = vinyl->pair_max;
16 :
17 0 : ulong io_seed = fd_vinyl_io_seed ( io );
18 0 : ulong seq_past = fd_vinyl_io_seq_past ( io );
19 0 : ulong seq_present = fd_vinyl_io_seq_present( io );
20 :
21 0 : fd_vinyl_meta_ele_t * ele0 = meta->ele;
22 0 : ulong ele_max = meta->ele_max;
23 0 : ulong meta_seed = meta->seed;
24 0 : ulong * lock = meta->lock;
25 0 : int lock_shift = meta->lock_shift;
26 :
27 0 : ulong seq = seq_past;
28 0 : ulong pair_cnt = 0UL;
29 0 : ulong garbage_sz = 0UL;
30 :
31 0 : while( fd_vinyl_seq_lt( seq, seq_present ) ) {
32 :
33 : /* At this point, we've recovered [seq_past,seq) and still need
34 : recover [seq,seq_present) (non-empty). Read the block at seq. */
35 :
36 0 : fd_vinyl_bstream_block_t block[1];
37 :
38 0 : fd_vinyl_io_read_imm( io, seq, block, FD_VINYL_BSTREAM_BLOCK_SZ );
39 :
40 0 : ulong ctl = block->ctl;
41 :
42 0 : int type = fd_vinyl_bstream_ctl_type( ctl );
43 :
44 0 : switch( type ) {
45 :
46 0 : case FD_VINYL_BSTREAM_CTL_TYPE_PAIR: {
47 :
48 : /* Notes:
49 :
50 : - It is okay if we are in a move (move block processing the
51 : previous iteration already confirmed this is the proper pair.
52 :
53 : - We could rewind the bstream to seq on truncation
54 : automatically but then we might have failed to recover the
55 : most recent pair and thus have recovered to a state that does
56 : not correspond to the bstream's past. We instead kick this
57 : to the user to decide if they want to discard an incompletely
58 : written pair or not. */
59 :
60 0 : ulong pair_val_esz = fd_vinyl_bstream_ctl_sz( ctl );
61 :
62 0 : ulong pair_sz = fd_vinyl_bstream_pair_sz( pair_val_esz );
63 :
64 0 : if( FD_UNLIKELY( pair_sz > (seq_present-seq) ) ) { /* Wrapping safe */
65 0 : FD_LOG_WARNING(( "%016lx: truncated", seq ));
66 0 : goto done;
67 0 : }
68 :
69 0 : fd_vinyl_bstream_block_t _ftr[1];
70 0 : fd_vinyl_bstream_block_t * ftr = _ftr;
71 :
72 0 : if( pair_sz <= FD_VINYL_BSTREAM_BLOCK_SZ ) ftr = block;
73 0 : else fd_vinyl_io_read_imm( io, seq + pair_sz - FD_VINYL_BSTREAM_BLOCK_SZ, ftr, FD_VINYL_BSTREAM_BLOCK_SZ );
74 :
75 0 : char const * _err = fd_vinyl_bstream_pair_test_fast( io_seed, seq, block, ftr );
76 0 : if( FD_UNLIKELY( _err ) ) {
77 0 : FD_LOG_WARNING(( "%016lx: %s", seq, _err ));
78 0 : goto done;
79 0 : }
80 :
81 : /* At this point, we appear to have valid completely written pair.
82 : Extract the pair metadata and determine if this replaces a
83 : version we've already seen. Since this single threaded, we can
84 : use the single threaded optimized meta APIs here. */
85 :
86 0 : fd_vinyl_key_t const * pair_key = &block->phdr.key;
87 :
88 0 : ulong pair_memo = fd_vinyl_key_memo( meta_seed, pair_key );
89 :
90 0 : ulong _ele_idx; /* avoid pointer escape */
91 0 : int err = fd_vinyl_meta_query_fast( ele0, ele_max, pair_key, pair_memo, &_ele_idx );
92 0 : ulong ele_idx = _ele_idx;
93 :
94 0 : if( FD_LIKELY( err==FD_VINYL_ERR_KEY ) ) {
95 :
96 : /* This is the first time we've seen pair key or pair key was
97 : erased in a previous iteration (e.g. we most recently
98 : processed an erase for pair key or we are in a move). If we
99 : have room for pair key, insert it into the meta at ele_idx. */
100 :
101 0 : if( FD_UNLIKELY( pair_cnt>=pair_max ) ) {
102 0 : FD_LOG_WARNING(( "%016lx: increase pair_max", seq ));
103 0 : goto done;
104 0 : }
105 :
106 0 : ele0[ ele_idx ].memo = pair_memo;
107 0 : ele0[ ele_idx ].phdr = block->phdr;
108 0 : ele0[ ele_idx ].seq = seq;
109 0 : ele0[ ele_idx ].line_idx = ULONG_MAX; /* key-val not in cache */
110 :
111 0 : pair_cnt++;
112 :
113 0 : } else if( FD_LIKELY( !err ) ) {
114 :
115 : /* This is a more recent version of a pair we saw previously and
116 : meta element ele_idx currently maps pair key to this previous
117 : version. Mark the old version as garbage to collect in the
118 : future and update the mapping to this version. */
119 :
120 0 : ulong old_pair_ctl = ele0[ ele_idx ].phdr.ctl;
121 :
122 0 : ulong old_pair_val_esz = fd_vinyl_bstream_ctl_sz( old_pair_ctl );
123 :
124 0 : garbage_sz += fd_vinyl_bstream_pair_sz( old_pair_val_esz );
125 :
126 : //ele0[ ele_idx ].memo = pair_memo; /* already current */
127 0 : ele0[ ele_idx ].phdr = block->phdr;
128 0 : ele0[ ele_idx ].seq = seq;
129 : //ele0[ ele_idx ].line_idx = ULONG_MAX; /* already current */
130 :
131 0 : } else {
132 :
133 0 : FD_LOG_WARNING(( "%016lx: corrupt meta", seq ));
134 0 : goto done;
135 :
136 0 : }
137 :
138 0 : seq += pair_sz;
139 0 : break;
140 :
141 0 : }
142 :
143 0 : case FD_VINYL_BSTREAM_CTL_TYPE_DEAD: {
144 :
145 0 : char const * _err = fd_vinyl_bstream_dead_test( io_seed, seq, block );
146 0 : if( FD_UNLIKELY( _err ) ) {
147 0 : FD_LOG_WARNING(( "%016lx: %s", seq, _err ));
148 0 : goto done;
149 0 : }
150 :
151 : /* At this point, we appear to have a valid DEAD block. Look up
152 : the pair it erases. */
153 :
154 0 : ulong pair_val_esz = fd_vinyl_bstream_ctl_sz( block->dead.phdr.ctl );
155 :
156 0 : fd_vinyl_key_t const * pair_key = &block->dead.phdr.key;
157 :
158 0 : ulong pair_memo = fd_vinyl_key_memo( meta_seed, pair_key );
159 :
160 0 : ulong _ele_idx; /* avoid pointer escape */
161 0 : int err = fd_vinyl_meta_query_fast( ele0, ele_max, pair_key, pair_memo, &_ele_idx );
162 0 : ulong ele_idx = _ele_idx;;
163 :
164 0 : if( FD_LIKELY( err==FD_VINYL_ERR_KEY ) ) {
165 :
166 : /* This erases the most recent version of pair key in the
167 : bstream's antiquity or is a redundant erase block (which is
168 : arguably an error but, as we can't tell the difference at
169 : this point, we assume the more likely antiquity case). In
170 : short, there's nothing to do but mark this block as garbage
171 : to collect in the future. */
172 :
173 0 : garbage_sz += FD_VINYL_BSTREAM_BLOCK_SZ;
174 :
175 0 : } else {
176 :
177 : /* This erases the most recent version of pair key we've
178 : processed. Validate the erasure target is correct. If so,
179 : mark this block and that version of pair key as garbage for
180 : future collection and remove pair key from the meta. */
181 :
182 0 : int bad_order = fd_vinyl_seq_ge( ele0[ ele_idx ].seq, seq );
183 0 : int bad_phdr = !!memcmp( &ele0[ ele_idx ].phdr, &block->dead.phdr, sizeof(fd_vinyl_bstream_phdr_t) );
184 :
185 0 : if( FD_UNLIKELY( bad_order | bad_phdr ) ) {
186 0 : FD_LOG_WARNING(( "%016lx: %s", seq, bad_order ? "unordered sequence" : "mismatched dead pair metadata" ));
187 0 : goto done;
188 0 : }
189 :
190 0 : garbage_sz += FD_VINYL_BSTREAM_BLOCK_SZ + fd_vinyl_bstream_pair_sz( pair_val_esz );
191 :
192 0 : fd_vinyl_meta_remove_fast( ele0, ele_max, lock, lock_shift, line, line_cnt, ele_idx );
193 :
194 0 : FD_CRIT( pair_cnt, "corruption detected" );
195 0 : pair_cnt--;
196 :
197 0 : }
198 :
199 0 : seq += FD_VINYL_BSTREAM_BLOCK_SZ;
200 0 : break;
201 :
202 0 : }
203 :
204 0 : case FD_VINYL_BSTREAM_CTL_TYPE_MOVE: {
205 :
206 0 : if( FD_UNLIKELY( 2UL*FD_VINYL_BSTREAM_BLOCK_SZ > (seq_present-seq) ) ) { /* Wrapping safe */
207 0 : FD_LOG_WARNING(( "%016lx: truncated", seq ));
208 0 : goto done;
209 0 : }
210 :
211 0 : fd_vinyl_bstream_block_t dst[1];
212 :
213 0 : fd_vinyl_io_read_imm( io, seq + FD_VINYL_BSTREAM_BLOCK_SZ, dst, FD_VINYL_BSTREAM_BLOCK_SZ );
214 :
215 0 : char const * _err = fd_vinyl_bstream_move_test( io_seed, seq, block, dst );
216 0 : if( FD_UNLIKELY( _err ) ) {
217 0 : FD_LOG_WARNING(( "%016lx: %s", seq, _err ));
218 0 : goto done;
219 0 : }
220 :
221 : /* At this point, we appear to have a valid move. Technically, a
222 : move is an atomic "erase pair src_key if any, erase pair
223 : dst_key if any, insert pair dst_key with the info src_info_old
224 : and val src_val_new" where src_val_new is typically the same as
225 : src_val_old, but, strictly speaking, doesn't have to be.
226 :
227 : We do the "erase pair src_key if any" part of the move here.
228 : The next iteration will handle rest naturally (including doing
229 : more extensive validation on the new pair_dst). Note that if
230 : the next iteration detects the new pair dst is invalid, it will
231 : fail recovery in the middle of the move. So applications
232 : should be very wary of using a partial recovery as such can
233 : break move atomicity. */
234 :
235 0 : ulong src_val_esz = fd_vinyl_bstream_ctl_sz( block->move.src.ctl );
236 0 : fd_vinyl_key_t const * src_key = &block->move.src.key;
237 :
238 0 : ulong src_memo = fd_vinyl_key_memo( meta_seed, src_key );
239 :
240 0 : ulong _ele_idx; /* avoid pointer escape */
241 0 : int err = fd_vinyl_meta_query_fast( ele0, ele_max, src_key, src_memo, &_ele_idx );
242 0 : ulong ele_idx = _ele_idx;
243 :
244 0 : if( FD_LIKELY( err==FD_VINYL_ERR_KEY ) ) {
245 :
246 : /* This move erases the most recent version of pair src_key in
247 : the bstream's antiquity or is a redundant move block (which
248 : is arguably an error but, as we can't tell the difference at
249 : this point, we assume the more likely antiquity case). In
250 : short, there's nothing to do but mark this block as garbage
251 : to collect in the future. */
252 :
253 0 : garbage_sz += FD_VINYL_BSTREAM_BLOCK_SZ;
254 :
255 0 : } else {
256 :
257 : /* This move erases the most recent version of pair src_key
258 : we've processed. Validate the erasure target is correct. If
259 : so, mark this block and this version of pair src_key as
260 : garbage for future collection and remove pair src_key from
261 : the meta. */
262 :
263 0 : int bad_order = fd_vinyl_seq_ge( ele0[ ele_idx ].seq, seq );
264 0 : int bad_cnt = !pair_cnt;
265 0 : int bad_phdr = !!memcmp( &ele0[ ele_idx ].phdr, &block->move.src, sizeof(fd_vinyl_bstream_phdr_t) );
266 :
267 0 : if( FD_UNLIKELY( bad_order | bad_cnt | bad_phdr ) ) {
268 0 : FD_LOG_WARNING(( "%016lx: %s", seq, bad_order ? "unordered sequence" :
269 0 : bad_cnt ? "corrupt meta" :
270 0 : "mismatched move src metadata" ));
271 0 : goto done;
272 0 : }
273 :
274 0 : garbage_sz += FD_VINYL_BSTREAM_BLOCK_SZ + fd_vinyl_bstream_pair_sz( src_val_esz );
275 :
276 0 : fd_vinyl_meta_remove_fast( ele0, ele_max, lock, lock_shift, line, line_cnt, ele_idx );
277 :
278 0 : pair_cnt--;
279 :
280 0 : }
281 :
282 : /* At this point, we've handled the "erase old src if any" part of
283 : the move. The next iteration will handle the "erase old dst if
284 : any" and the "insert new dst" part of the move. We know there
285 : will be a next iteration for a type pair object with the
286 : appropriate mojo because of the checks we've already done. So
287 : moves behave atomically from the point of view of the
288 : application when fully recovered. */
289 :
290 0 : seq += FD_VINYL_BSTREAM_BLOCK_SZ;
291 0 : break;
292 :
293 0 : }
294 :
295 0 : case FD_VINYL_BSTREAM_CTL_TYPE_PART: {
296 :
297 0 : if( FD_UNLIKELY( fd_vinyl_seq_ne( block->part.seq, seq ) ) ) {
298 0 : FD_LOG_WARNING(( "%016lx: unexpected part seq", seq ));
299 0 : goto done;
300 0 : }
301 :
302 0 : char const * _err = fd_vinyl_bstream_part_test( io_seed, seq, block );
303 0 : if( FD_UNLIKELY( _err ) ) {
304 0 : FD_LOG_WARNING(( "%016lx: %s", seq, _err ));
305 0 : goto done;
306 0 : }
307 :
308 0 : garbage_sz += FD_VINYL_BSTREAM_BLOCK_SZ;
309 0 : seq += FD_VINYL_BSTREAM_BLOCK_SZ;
310 0 : break;
311 :
312 0 : }
313 :
314 0 : case FD_VINYL_BSTREAM_CTL_TYPE_ZPAD: {
315 :
316 0 : char const * _err = fd_vinyl_bstream_zpad_test( io_seed, seq, block );
317 0 : if( FD_UNLIKELY( _err ) ) {
318 0 : FD_LOG_WARNING(( "%016lx: %s", seq, _err ));
319 0 : goto done;
320 0 : }
321 :
322 : /* Note: zpad blocks aren't included in garbage_sz because we
323 : don't control when they get created (and thus can't easily
324 : update garbage_sz to account for them when they are created). */
325 :
326 0 : seq += FD_VINYL_BSTREAM_BLOCK_SZ;
327 0 : break;
328 :
329 0 : }
330 :
331 0 : default:
332 0 : FD_LOG_WARNING(( "%016lx: unknown type (%x)", seq, (uint)type ));
333 0 : goto done;
334 0 : }
335 0 : }
336 :
337 0 : done:
338 :
339 : /* At this point, the meta is populated appropriately up to seq.
340 : Update the vinyl state and return. If we did not get to
341 : seq_present, we log a warning. */
342 :
343 0 : vinyl->pair_cnt = pair_cnt;
344 0 : vinyl->garbage_sz = garbage_sz;
345 :
346 0 : if( FD_UNLIKELY( fd_vinyl_seq_ne( seq, seq_present ) ) )
347 0 : FD_LOG_WARNING(( "recovery failed, recovered [%016lx,%016lx)/%lu, unrecovered [%016lx,%016lx)/%lu",
348 0 : seq_past, seq, seq-seq_past, seq, seq_present, seq_present-seq ));
349 :
350 0 : return seq;
351 0 : }
|