Line data Source code
1 : #include "fd_store.h"
2 :
3 : void *
4 0 : fd_store_new( void * mem, ulong lo_wmark_slot ) {
5 0 : if( FD_UNLIKELY( !mem ) ) {
6 0 : FD_LOG_WARNING( ( "NULL mem" ) );
7 0 : return NULL;
8 0 : }
9 :
10 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)mem, fd_store_align() ) ) ) {
11 0 : FD_LOG_WARNING( ( "misaligned mem" ) );
12 0 : return NULL;
13 0 : }
14 :
15 0 : fd_memset( mem, 0, fd_store_footprint() );
16 :
17 0 : fd_store_t * store = (fd_store_t *)mem;
18 0 : store->first_turbine_slot = FD_SLOT_NULL;
19 0 : store->curr_turbine_slot = FD_SLOT_NULL;
20 0 : store->root = FD_SLOT_NULL;
21 0 : fd_repair_backoff_map_new( store->repair_backoff_map );
22 0 : store->pending_slots = fd_pending_slots_new( (uchar *)mem + fd_store_footprint(), lo_wmark_slot );
23 0 : if( FD_UNLIKELY( !store->pending_slots ) ) {
24 0 : return NULL;
25 0 : }
26 :
27 0 : return mem;
28 0 : }
29 :
30 : fd_store_t *
31 0 : fd_store_join( void * store ) {
32 0 : if( FD_UNLIKELY( !store ) ) {
33 0 : FD_LOG_WARNING( ( "NULL store" ) );
34 0 : return NULL;
35 0 : }
36 :
37 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)store, fd_store_align() ) ) ) {
38 0 : FD_LOG_WARNING( ( "misaligned replay" ) );
39 0 : return NULL;
40 0 : }
41 :
42 0 : fd_store_t * store_ = (fd_store_t *)store;
43 0 : fd_repair_backoff_map_join( store_->repair_backoff_map );
44 0 : store_->pending_slots = fd_pending_slots_join( store_->pending_slots );
45 0 : if( FD_UNLIKELY( !store_->pending_slots ) ) {
46 0 : return NULL;
47 0 : }
48 :
49 0 : return store_;
50 0 : }
51 :
52 : void *
53 0 : fd_store_leave( fd_store_t const * store ) {
54 0 : if( FD_UNLIKELY( !store ) ) {
55 0 : FD_LOG_WARNING( ( "NULL store" ) );
56 0 : return NULL;
57 0 : }
58 :
59 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)store, fd_store_align() ) ) ) {
60 0 : FD_LOG_WARNING( ( "misaligned store" ) );
61 0 : return NULL;
62 0 : }
63 :
64 0 : return (void *)store;
65 0 : }
66 :
67 : void *
68 0 : fd_store_delete( void * store ) {
69 0 : if( FD_UNLIKELY( !store ) ) {
70 0 : FD_LOG_WARNING( ( "NULL store" ) );
71 0 : return NULL;
72 0 : }
73 :
74 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)store, fd_store_align() ) ) ) {
75 0 : FD_LOG_WARNING( ( "misaligned store" ) );
76 0 : return NULL;
77 0 : }
78 :
79 0 : return store;
80 0 : }
81 :
82 : void
83 0 : fd_store_expected_shred_version( fd_store_t * store, ulong expected_shred_version ) {
84 0 : store->expected_shred_version = expected_shred_version;
85 0 : }
86 :
87 : int
88 : fd_store_slot_prepare( fd_store_t * store,
89 : ulong slot,
90 : ulong * repair_slot_out,
91 : uchar const ** block_out,
92 0 : ulong * block_sz_out ) {
93 0 : fd_blockstore_start_read( store->blockstore );
94 :
95 0 : ulong re_adds[2];
96 0 : uint re_adds_cnt = 0U;
97 0 : long re_add_delays[2];
98 :
99 0 : *repair_slot_out = 0;
100 0 : int rc = FD_STORE_SLOT_PREPARE_CONTINUE;
101 :
102 0 : fd_block_t * block = fd_blockstore_block_query( store->blockstore, slot );
103 0 : fd_block_map_t * block_map_entry = fd_blockstore_block_map_query( store->blockstore, slot );
104 :
105 :
106 : /* We already executed this block */
107 0 : if( FD_UNLIKELY( block && fd_uchar_extract_bit( block_map_entry->flags, FD_BLOCK_FLAG_REPLAYING ) ) ) {
108 0 : rc = FD_STORE_SLOT_PREPARE_ALREADY_EXECUTED;
109 0 : goto end;
110 0 : }
111 :
112 0 : if( FD_UNLIKELY( block && fd_uchar_extract_bit( block_map_entry->flags, FD_BLOCK_FLAG_PROCESSED ) ) ) {
113 0 : rc = FD_STORE_SLOT_PREPARE_ALREADY_EXECUTED;
114 0 : goto end;
115 0 : }
116 :
117 0 : if( FD_UNLIKELY( !block_map_entry ) ) {
118 : /* I know nothing about this block yet */
119 0 : rc = FD_STORE_SLOT_PREPARE_NEED_REPAIR;
120 0 : *repair_slot_out = slot;
121 0 : re_add_delays[re_adds_cnt] = FD_REPAIR_BACKOFF_TIME;
122 0 : re_adds[re_adds_cnt++] = slot;
123 0 : goto end;
124 0 : }
125 :
126 0 : ulong parent_slot = block_map_entry->parent_slot;
127 0 : fd_block_map_t * parent_block_map_entry = fd_blockstore_block_map_query( store->blockstore, parent_slot );
128 :
129 : /* If the parent slot meta is missing, this block is an orphan and the ancestry needs to be
130 : * repaired before we can replay it. */
131 0 : if( FD_UNLIKELY( !parent_block_map_entry ) ) {
132 0 : rc = FD_STORE_SLOT_PREPARE_NEED_ORPHAN;
133 0 : *repair_slot_out = slot;
134 0 : re_add_delays[re_adds_cnt] = FD_REPAIR_BACKOFF_TIME;
135 0 : re_adds[re_adds_cnt++] = slot;
136 :
137 0 : re_add_delays[re_adds_cnt] = FD_REPAIR_BACKOFF_TIME;
138 0 : re_adds[re_adds_cnt++] = parent_slot;
139 0 : goto end;
140 0 : }
141 :
142 0 : fd_blockstore_start_read( store->blockstore );
143 0 : fd_block_t * parent_block = fd_blockstore_block_query( store->blockstore, parent_slot );
144 0 : fd_blockstore_end_read( store->blockstore );
145 :
146 : /* We have a parent slot meta, and therefore have at least one shred of the parent block, so we
147 : have the ancestry and need to repair that block directly (as opposed to calling repair orphan).
148 : */
149 0 : if( FD_UNLIKELY( !parent_block ) ) {
150 0 : rc = FD_STORE_SLOT_PREPARE_NEED_REPAIR;
151 0 : *repair_slot_out = parent_slot;
152 0 : re_add_delays[re_adds_cnt] = FD_REPAIR_BACKOFF_TIME;
153 0 : re_adds[re_adds_cnt++] = parent_slot;
154 0 : re_add_delays[re_adds_cnt] = FD_REPAIR_BACKOFF_TIME;
155 0 : re_adds[re_adds_cnt++] = slot;
156 :
157 0 : goto end;
158 0 : }
159 :
160 : /* See if the parent is executed yet */
161 0 : if( FD_UNLIKELY( !fd_uchar_extract_bit( parent_block_map_entry->flags, FD_BLOCK_FLAG_PROCESSED ) ) ) {
162 0 : rc = FD_STORE_SLOT_PREPARE_NEED_PARENT_EXEC;
163 : // FD_LOG_WARNING(("NEED PARENT EXEC %lu %lu", slot, parent_slot));
164 0 : if( FD_UNLIKELY( !fd_uchar_extract_bit( parent_block_map_entry->flags, FD_BLOCK_FLAG_REPLAYING ) ) ) {
165 : /* ... but it is not prepared */
166 0 : re_add_delays[re_adds_cnt] = (long)5e6;
167 0 : re_adds[re_adds_cnt++] = slot;
168 0 : }
169 0 : re_add_delays[re_adds_cnt] = (long)5e6;
170 0 : re_adds[re_adds_cnt++] = parent_slot;
171 0 : goto end;
172 0 : }
173 :
174 : /* The parent is executed, but the block is still incomplete. Ask for more shreds. */
175 0 : if( FD_UNLIKELY( !block ) ) {
176 0 : rc = FD_STORE_SLOT_PREPARE_NEED_REPAIR;
177 0 : *repair_slot_out = slot;
178 0 : re_add_delays[re_adds_cnt] = FD_REPAIR_BACKOFF_TIME;
179 0 : re_adds[re_adds_cnt++] = slot;
180 0 : goto end;
181 0 : }
182 :
183 : /* Prepare the replay_slot struct. */
184 0 : *block_out = fd_blockstore_block_data_laddr( store->blockstore, block );
185 0 : *block_sz_out = block->data_sz;
186 :
187 : /* Mark the block as prepared, and thus unsafe to remove. */
188 0 : block_map_entry->flags = fd_uchar_set_bit( block_map_entry->flags, FD_BLOCK_FLAG_REPLAYING );
189 :
190 0 : end:
191 : /* Block data ptr remains valid outside of the rw lock for the lifetime of the block alloc. */
192 0 : fd_blockstore_end_read( store->blockstore );
193 :
194 0 : for (uint i = 0; i < re_adds_cnt; ++i)
195 0 : fd_store_add_pending( store, re_adds[i], re_add_delays[i], 0, 0 );
196 :
197 0 : return rc;
198 0 : }
199 :
200 : int
201 : fd_store_shred_insert( fd_store_t * store,
202 0 : fd_shred_t const * shred ) {
203 :
204 0 : if( FD_UNLIKELY( shred->version != store->expected_shred_version ) ) {
205 0 : FD_LOG_WARNING(( "received shred version %lu instead of %lu", (ulong)shred->version, store->expected_shred_version ));
206 0 : return FD_BLOCKSTORE_OK;
207 0 : }
208 :
209 0 : fd_blockstore_t * blockstore = store->blockstore;
210 :
211 0 : if (shred->slot < blockstore->smr) {
212 0 : return FD_BLOCKSTORE_OK;
213 0 : }
214 0 : uchar shred_type = fd_shred_type( shred->variant );
215 : // FD_LOG_INFO(("is chained: %u", fd_shred_is_chained(shred_type) ));
216 0 : if( shred_type != FD_SHRED_TYPE_LEGACY_DATA
217 0 : && shred_type != FD_SHRED_TYPE_MERKLE_DATA
218 0 : && shred_type != FD_SHRED_TYPE_MERKLE_DATA_CHAINED
219 0 : && shred_type != FD_SHRED_TYPE_MERKLE_DATA_CHAINED_RESIGNED ) {
220 0 : return FD_BLOCKSTORE_OK;
221 0 : }
222 :
223 0 : if( store->root!=FD_SLOT_NULL && shred->slot<store->root ) {
224 0 : FD_LOG_WARNING(( "shred slot is behind root, dropping shred - root: %lu, shred_slot: %lu", store->root, shred->slot ));
225 0 : return FD_BLOCKSTORE_OK;
226 0 : }
227 :
228 0 : fd_blockstore_start_write( blockstore );
229 0 : if( fd_blockstore_block_query( blockstore, shred->slot ) != NULL ) {
230 0 : fd_blockstore_end_write( blockstore );
231 0 : return FD_BLOCKSTORE_OK;
232 0 : }
233 0 : int rc = fd_buf_shred_insert( blockstore, shred );
234 0 : fd_blockstore_end_write( blockstore );
235 :
236 : /* FIXME */
237 0 : if( FD_UNLIKELY( rc < FD_BLOCKSTORE_OK ) ) {
238 0 : FD_LOG_ERR( ( "failed to insert shred. reason: %d", rc ) );
239 0 : } else if ( rc == FD_BLOCKSTORE_OK_SLOT_COMPLETE ) {
240 0 : fd_store_add_pending( store, shred->slot, (long)5e6, 0, 1 );
241 0 : } else {
242 0 : fd_store_add_pending( store, shred->slot, FD_REPAIR_BACKOFF_TIME, 0, 0 );
243 0 : fd_repair_backoff_t * backoff = fd_repair_backoff_map_query( store->repair_backoff_map, shred->slot, NULL );
244 0 : if( FD_LIKELY( backoff==NULL ) ) {
245 : /* new backoff entry */
246 0 : backoff = fd_repair_backoff_map_insert( store->repair_backoff_map, shred->slot );
247 0 : backoff->last_backoff_duration = FD_REPAIR_BACKOFF_TIME;
248 0 : backoff->last_repair_time = store->now;
249 0 : } else if( ( backoff->last_repair_time+backoff->last_backoff_duration )
250 0 : >( store->now + FD_REPAIR_BACKOFF_TIME ) ) {
251 0 : backoff->last_backoff_duration = FD_REPAIR_BACKOFF_TIME;
252 0 : backoff->last_repair_time = store->now;
253 0 : }
254 0 : }
255 0 : return rc;
256 0 : }
257 :
258 : void
259 : fd_store_shred_update_with_shred_from_turbine( fd_store_t * store,
260 0 : fd_shred_t const * shred ) {
261 0 : if( FD_UNLIKELY( store->first_turbine_slot == FD_SLOT_NULL ) ) {
262 0 : FD_LOG_NOTICE(("first turbine slot: %lu", shred->slot));
263 : // ulong slot = shred->slot;
264 : // while ( slot > store->snapshot_slot ) {
265 : // fd_store_add_pending( store, slot, 0 );
266 : // slot -= 10;
267 : // }
268 0 : store->first_turbine_slot = shred->slot;
269 0 : store->curr_turbine_slot = shred->slot;
270 0 : }
271 :
272 0 : store->curr_turbine_slot = fd_ulong_max(shred->slot, store->curr_turbine_slot);
273 0 : }
274 :
275 : void
276 : fd_store_add_pending( fd_store_t * store,
277 : ulong slot,
278 : long delay,
279 : int should_backoff,
280 0 : int reset_backoff ) {
281 0 : (void)should_backoff;
282 0 : (void)reset_backoff;
283 : // fd_repair_backoff_t * backoff = fd_repair_backoff_map_query( store->repair_backoff_map, slot, NULL );
284 : // long existing_when = fd_pending_slots_get( store->pending_slots, slot );
285 : // if( existing_when!=0L && existing_when!=LONG_MAX ) {
286 : // if( !should_backoff && delay > ( existing_when-store->now ) ) {
287 : // return;
288 : // }
289 : // }
290 : // // if( existing_when!=0L && existing_when!=LONG_MAX ) {
291 : // // if( !should_backoff && delay < ( existing_when-store->now ) ) {
292 : // // FD_LOG_WARNING(( "hey! %lu %ld %ld ", slot, delay, ( existing_when-store->now )));
293 : // // } else {
294 : // // FD_LOG_WARNING(( "eep %lu %lu %lu %d %lu", slot, delay/1000000, (existing_when - store->now)/1000000, should_backoff ));
295 : // // return;
296 : // // }
297 : // // }
298 : // if( backoff==NULL ) {
299 : // backoff = fd_repair_backoff_map_insert( store->repair_backoff_map, slot );
300 : // backoff->slot = slot;
301 : // backoff->last_backoff = delay;
302 : // } else if( reset_backoff ) {
303 : // backoff->last_backoff = delay;
304 : // } else if( should_backoff ) {
305 : // ulong backoff->last_backoff + (backoff->last_backoff>>3);
306 : // backoff->last_backoff =
307 : // delay = backoff->last_backoff;
308 : // } else {
309 : // delay = backoff->last_backoff;
310 : // }
311 : // if( should_backoff ) FD_LOG_INFO(("PENDING %lu %d %lu %ld", slot, should_backoff, delay/1000000, (existing_when-store->now)/1000000L));
312 0 : if( store->root!=FD_SLOT_NULL && slot<store->root) {
313 0 : FD_LOG_WARNING(( "slot is older than root, skipping adding slot to pending queue - root: %lu, slot: %lu",
314 0 : store->root, slot ));
315 0 : return;
316 0 : }
317 0 : fd_pending_slots_add( store->pending_slots, slot, store->now + (long)delay );
318 0 : }
319 :
320 : void
321 : fd_store_set_root( fd_store_t * store,
322 0 : ulong root ) {
323 0 : store->root = root;
324 0 : fd_pending_slots_set_lo_wmark( store->pending_slots, root );
325 :
326 : /* remove old roots */
327 0 : for( ulong i = 0; i<fd_repair_backoff_map_slot_cnt(); i++ ) {
328 0 : if( store->repair_backoff_map[ i ].slot <= root ) {
329 0 : fd_repair_backoff_map_remove( store->repair_backoff_map, &store->repair_backoff_map[ i ] );
330 0 : }
331 0 : }
332 0 : }
333 :
334 : ulong
335 : fd_store_slot_repair( fd_store_t * store,
336 : ulong slot,
337 : fd_repair_request_t * out_repair_reqs,
338 0 : ulong out_repair_reqs_sz ) {
339 0 : if( out_repair_reqs_sz==0UL ) {
340 0 : return 0UL;
341 0 : }
342 :
343 0 : fd_repair_backoff_t * backoff = fd_repair_backoff_map_query( store->repair_backoff_map, slot, NULL );
344 0 : if( FD_LIKELY( backoff!=NULL ) ) {
345 0 : if( store->now<( backoff->last_repair_time+backoff->last_backoff_duration ) ) {
346 0 : return 0UL;
347 0 : }
348 0 : } else {
349 : /* new backoff entry */
350 0 : backoff = fd_repair_backoff_map_insert( store->repair_backoff_map, slot );
351 0 : backoff->last_backoff_duration = FD_REPAIR_BACKOFF_TIME;
352 0 : }
353 0 : backoff->last_repair_time = store->now;
354 :
355 0 : ulong repair_req_cnt = 0;
356 0 : fd_blockstore_start_read( store->blockstore );
357 0 : fd_block_map_t * block_map_entry = fd_blockstore_block_map_query( store->blockstore, slot );
358 :
359 0 : if( FD_LIKELY( !block_map_entry ) ) {
360 : /* We haven't received any shreds for this slot yet */
361 :
362 0 : fd_repair_request_t * repair_req = &out_repair_reqs[repair_req_cnt++];
363 0 : repair_req->shred_index = 0;
364 0 : repair_req->slot = slot;
365 0 : repair_req->type = FD_REPAIR_REQ_TYPE_NEED_HIGHEST_WINDOW_INDEX;
366 0 : } else {
367 : /* We've received at least one shred, so fill in what's missing */
368 :
369 0 : uint complete_idx = block_map_entry->complete_idx;
370 :
371 : /* We don't know the last index yet */
372 0 : if( FD_UNLIKELY( complete_idx == UINT_MAX ) ) {
373 0 : complete_idx = block_map_entry->received_idx - 1;
374 0 : fd_repair_request_t * repair_req = &out_repair_reqs[repair_req_cnt++];
375 0 : repair_req->shred_index = complete_idx;
376 0 : repair_req->slot = slot;
377 0 : repair_req->type = FD_REPAIR_REQ_TYPE_NEED_HIGHEST_WINDOW_INDEX;
378 0 : }
379 :
380 0 : if( repair_req_cnt==out_repair_reqs_sz ) {
381 0 : backoff->last_backoff_duration += backoff->last_backoff_duration>>2;
382 0 : FD_LOG_INFO( ( "[repair] MAX need %lu [%u, %u], sent %lu requests (backoff: %ld ms)", slot, block_map_entry->consumed_idx + 1, complete_idx, repair_req_cnt, backoff->last_backoff_duration/(long)1e6 ) );
383 0 : fd_blockstore_end_read( store->blockstore );
384 0 : return repair_req_cnt;
385 0 : }
386 :
387 : /* First make sure we are ready to execute this block soon. Look for an ancestor that was executed. */
388 0 : ulong anc_slot = slot;
389 0 : int good = 0;
390 0 : for( uint i = 0; i < 6; ++i ) {
391 0 : anc_slot = fd_blockstore_parent_slot_query( store->blockstore, anc_slot );
392 0 : fd_block_t * anc_block = fd_blockstore_block_query( store->blockstore, anc_slot );
393 0 : fd_block_map_t * anc_block_map_entry = fd_blockstore_block_map_query( store->blockstore, anc_slot );
394 0 : if( anc_block && fd_uchar_extract_bit( anc_block_map_entry->flags, FD_BLOCK_FLAG_PROCESSED ) ) {
395 0 : good = 1;
396 0 : out_repair_reqs_sz /= (i>>1)+1U; /* Slow roll blocks that are further out */
397 0 : break;
398 0 : }
399 0 : }
400 :
401 0 : if( !good ) {
402 0 : fd_blockstore_end_read( store->blockstore );
403 0 : return repair_req_cnt;
404 0 : }
405 :
406 : /* Fill in what's missing */
407 0 : for( uint i = block_map_entry->consumed_idx + 1; i <= complete_idx; i++ ) {
408 0 : if( FD_UNLIKELY( fd_buf_shred_query( store->blockstore, slot, i ) != NULL) ) continue;
409 :
410 0 : fd_repair_request_t * repair_req = &out_repair_reqs[repair_req_cnt++];
411 0 : repair_req->shred_index = i;
412 0 : repair_req->slot = slot;
413 0 : repair_req->type = FD_REPAIR_REQ_TYPE_NEED_WINDOW_INDEX;
414 :
415 0 : if( repair_req_cnt == out_repair_reqs_sz ) {
416 0 : backoff->last_backoff_duration += backoff->last_backoff_duration>>2;
417 0 : FD_LOG_INFO( ( "[repair] MAX need %lu [%u, %u], sent %lu requests (backoff: %ld ms)", slot, block_map_entry->consumed_idx + 1, complete_idx, repair_req_cnt, backoff->last_backoff_duration/(long)1e6 ) );
418 0 : fd_blockstore_end_read( store->blockstore );
419 0 : return repair_req_cnt;
420 0 : }
421 0 : }
422 0 : if( repair_req_cnt ) {
423 0 : backoff->last_backoff_duration += backoff->last_backoff_duration>>2;
424 0 : FD_LOG_INFO( ( "[repair] need %lu [%u, %u], sent %lu requests (backoff: %ld ms)", slot, block_map_entry->consumed_idx + 1, complete_idx, repair_req_cnt, backoff->last_backoff_duration/(long)1e6 ) );
425 0 : }
426 0 : }
427 :
428 0 : fd_blockstore_end_read( store->blockstore );
429 0 : return repair_req_cnt;
430 0 : }
|