Line data Source code
1 : #include "fd_poh.h"
2 :
3 : /* The PoH implementation is at its core a state machine ...
4 :
5 : +--------+
6 : | UNINIT |
7 : +--------+
8 : |
9 : +---------+ | +---------+
10 : | v v v |
11 : +-------------------+ +----------+ +------------------+
12 : | WAITING_FOR_SLOT |<----| FOLLOWER |----->| WAITING_FOR_BANK |
13 : +-------------------+ +----------+ +------------------+
14 : | ^ |
15 : | | |
16 : | +----------+ |
17 : |------>| LEADER |<--------+
18 : +----------+
19 :
20 : The state machine starts UNINIT, but once a snapshot is loaded it
21 : will transition to follower.
22 :
23 : The state machine is in a resting the state when FOLLOWER, in this
24 : state it knows a `next_leader_slot` and will continually hash to
25 : advance towards that slot. When it reaches the `next_leader_slot`
26 : it will transition to the WAITING_FOR_BANK state, where it waits for
27 : the replay stage to tell it some information relevant to that leader
28 : slot, so that it can start doing mixins and hashing towards the end
29 : of the block. When the block ends, the state transitions back to
30 : follower, even if the next slot is the leader, as we need the replay
31 : stage to tell us about the new leader slot.
32 :
33 : Sometimes it might happen that we have received the bank from replay
34 : stage before we have reached the `next_leader_slot`, in which case
35 : we transition to the WAITING_FOR_SLOT state, where we wait for the
36 : hash count to reach the leader slot.
37 :
38 : At any time, during any state except UNINIT, we can be suddenly
39 : "reset" by the replay tile. Such reset actions may move the reset
40 : slot backwards or forwards, or set it back to something we have
41 : already seen before. BUT, the `next_leader_slot` must always
42 : advance forward.
43 :
44 : If the PoH machine successfully completes a leader slot, by hashing
45 : it until the end, then the a completion message is sent back to
46 : replay with the final blockhash, after which the state machine enters
47 : the follower state once again, and waits for further instructions
48 : from replay. */
49 :
50 0 : #define STATE_UNINIT (0)
51 0 : #define STATE_FOLLOWER (1)
52 0 : #define STATE_WAITING_FOR_BANK (2)
53 0 : #define STATE_WAITING_FOR_SLOT (3)
54 0 : #define STATE_LEADER (4)
55 0 : #define STATE_WAITING_FOR_RESET (5)
56 :
57 : FD_FN_CONST ulong
58 0 : fd_poh_align( void ) {
59 0 : return FD_POH_ALIGN;
60 0 : }
61 :
62 : FD_FN_CONST ulong
63 0 : fd_poh_footprint( void ) {
64 0 : return sizeof(fd_poh_t);
65 0 : }
66 :
67 : void *
68 0 : fd_poh_new( void * shmem ) {
69 0 : fd_poh_t * poh = (fd_poh_t *)shmem;
70 :
71 0 : if( FD_UNLIKELY( !poh ) ) {
72 0 : FD_LOG_WARNING(( "NULL shmem" ));
73 0 : return NULL;
74 0 : }
75 :
76 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)poh, fd_poh_align() ) ) ) {
77 0 : FD_LOG_WARNING(( "misaligned shmem" ));
78 0 : return NULL;
79 0 : }
80 :
81 0 : poh->hashcnt_per_tick = ULONG_MAX;
82 0 : poh->state = STATE_UNINIT;
83 0 : poh->wfs_paused = 0;
84 :
85 0 : FD_COMPILER_MFENCE();
86 0 : FD_VOLATILE( poh->magic ) = FD_POH_MAGIC;
87 0 : FD_COMPILER_MFENCE();
88 :
89 0 : return (void *)poh;
90 0 : }
91 :
92 : fd_poh_t *
93 : fd_poh_join( void * shpoh,
94 : fd_poh_out_t * shred_out,
95 0 : fd_poh_out_t * replay_out ) {
96 0 : if( FD_UNLIKELY( !shpoh ) ) {
97 0 : FD_LOG_WARNING(( "NULL shpoh" ));
98 0 : return NULL;
99 0 : }
100 :
101 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shpoh, fd_poh_align() ) ) ) {
102 0 : FD_LOG_WARNING(( "misaligned shpoh" ));
103 0 : return NULL;
104 0 : }
105 :
106 0 : fd_poh_t * poh = (fd_poh_t *)shpoh;
107 :
108 0 : if( FD_UNLIKELY( poh->magic!=FD_POH_MAGIC ) ) {
109 0 : FD_LOG_WARNING(( "bad magic" ));
110 0 : return NULL;
111 0 : }
112 :
113 0 : *poh->shred_out = *shred_out;
114 0 : *poh->replay_out = *replay_out;
115 :
116 0 : return poh;
117 0 : }
118 :
119 : static void
120 : transition_to_follower( fd_poh_t * poh,
121 : fd_stem_context_t * stem,
122 0 : int completed_leader_slot ) {
123 0 : FD_TEST( poh->state==STATE_LEADER || poh->state==STATE_WAITING_FOR_BANK || poh->state==STATE_WAITING_FOR_SLOT || poh->state==STATE_WAITING_FOR_RESET );
124 :
125 0 : if( FD_LIKELY( completed_leader_slot ) ) FD_TEST( poh->state==STATE_LEADER );
126 :
127 0 : if( FD_LIKELY( poh->state==STATE_LEADER || poh->state==STATE_WAITING_FOR_SLOT ) ) {
128 0 : fd_poh_leader_slot_ended_t * dst = fd_chunk_to_laddr( poh->replay_out->mem, poh->replay_out->chunk );
129 0 : dst->completed = completed_leader_slot;
130 0 : dst->slot = poh->slot-1UL;
131 0 : fd_memcpy( dst->blockhash, poh->hash, 32UL );
132 0 : ulong tspub = (ulong)fd_frag_meta_ts_comp( fd_tickcount() );
133 0 : fd_stem_publish( stem, poh->replay_out->idx, 0UL, poh->replay_out->chunk, sizeof(fd_poh_leader_slot_ended_t), 0UL, 0UL, tspub );
134 0 : poh->replay_out->chunk = fd_dcache_compact_next( poh->replay_out->chunk, sizeof(fd_poh_leader_slot_ended_t), poh->replay_out->chunk0, poh->replay_out->wmark );
135 0 : }
136 :
137 0 : poh->state = STATE_FOLLOWER;
138 0 : }
139 :
140 : static void
141 : update_hashes_per_tick( fd_poh_t * poh,
142 0 : ulong hashcnt_per_tick ) {
143 0 : if( FD_UNLIKELY( poh->hashcnt_per_tick!=hashcnt_per_tick ) ) {
144 0 : if( FD_UNLIKELY( poh->hashcnt_per_tick!=ULONG_MAX ) ) {
145 0 : FD_LOG_WARNING(( "hashes per tick changed from %lu to %lu", poh->hashcnt_per_tick, hashcnt_per_tick ));
146 0 : }
147 :
148 : /* Recompute derived information about the clock. */
149 0 : poh->hashcnt_duration_ns = (double)poh->tick_duration_ns/(double)hashcnt_per_tick;
150 0 : poh->hashcnt_per_slot = poh->ticks_per_slot*hashcnt_per_tick;
151 0 : poh->hashcnt_per_tick = hashcnt_per_tick;
152 :
153 : /* Discard any ticks we might have done in the interim. They will
154 : have the wrong number of hashes per tick. We can just catch back
155 : up quickly if not too many slots were skipped and hopefully
156 : publish on time. Note that tick production and verification of
157 : skipped slots is done for the eventual bank that publishes a
158 : slot, for example:
159 :
160 : Reset Slot: 998
161 : Epoch Transition Slot: 1000
162 : Leader Slot: 1002
163 :
164 : In this case, if a feature changing the hashcnt_per_tick is
165 : activated in slot 1000, and we are publishing empty ticks for
166 : slots 998, 999, 1000, and 1001, they should all have the new
167 : hashes_per_tick number of hashes, rather than the older one, or
168 : some combination. */
169 :
170 0 : FD_TEST( poh->last_slot==poh->reset_slot );
171 0 : FD_TEST( !poh->last_hashcnt );
172 0 : poh->slot = poh->reset_slot;
173 0 : poh->hashcnt = 0UL;
174 0 : }
175 0 : }
176 :
177 : void
178 : fd_poh_reset( fd_poh_t * poh,
179 : fd_stem_context_t * stem,
180 : long timestamp, /* The local timestamp when the reset is occurring */
181 : ulong hashcnt_per_tick, /* The hashcnt per tick of the bank that completed */
182 : ulong ticks_per_slot,
183 : ulong tick_duration_ns,
184 : ulong completed_slot, /* The slot that successfully produced a block */
185 : uchar const * completed_blockhash, /* The hash of the last tick in the produced block */
186 : ulong next_leader_slot, /* The next slot where this node will be leader */
187 : ulong max_microblocks_in_slot, /* The maximum number of microblocks that may appear in a slot */
188 0 : uchar const * completed_block_id /* The block id of the completed block */) {
189 0 : memcpy( poh->reset_hash, completed_blockhash, 32UL );
190 0 : memcpy( poh->hash, completed_blockhash, 32UL );
191 0 : memcpy( poh->completed_block_id, completed_block_id, 32UL );
192 0 : poh->slot = completed_slot+1UL;
193 0 : poh->hashcnt = 0UL;
194 0 : poh->last_slot = poh->slot;
195 0 : poh->last_hashcnt = 0UL;
196 0 : poh->reset_slot = poh->slot;
197 0 : poh->next_leader_slot = next_leader_slot;
198 0 : poh->max_microblocks_per_slot = max_microblocks_in_slot;
199 0 : poh->reset_slot_start_ns = timestamp;
200 :
201 0 : if( FD_UNLIKELY( poh->state==STATE_UNINIT ) ) {
202 0 : poh->tick_duration_ns = tick_duration_ns;
203 0 : poh->ticks_per_slot = ticks_per_slot;
204 0 : poh->state = STATE_FOLLOWER;
205 0 : } else {
206 0 : FD_TEST( tick_duration_ns==poh->tick_duration_ns );
207 0 : FD_TEST( ticks_per_slot==poh->ticks_per_slot );
208 0 : }
209 0 : update_hashes_per_tick( poh, hashcnt_per_tick );
210 :
211 : /* When we reset, we need to allow PoH to tick freely again rather
212 : than being constrained. If we are leader after the reset, this
213 : is OK because we won't tick until we get a bank, and the lower
214 : bound will be reset with the value from the bank. */
215 0 : poh->microblocks_lower_bound = poh->max_microblocks_per_slot;
216 :
217 0 : if( FD_UNLIKELY( poh->state!=STATE_FOLLOWER ) ) transition_to_follower( poh, stem, 0 );
218 0 : if( FD_UNLIKELY( poh->slot==poh->next_leader_slot ) ) poh->state = STATE_WAITING_FOR_BANK;
219 :
220 0 : }
221 :
222 : void
223 : fd_poh_begin_leader( fd_poh_t * poh,
224 : ulong slot,
225 : ulong hashcnt_per_tick,
226 : ulong ticks_per_slot,
227 : ulong tick_duration_ns,
228 : ulong max_microblocks_in_slot,
229 0 : long slot_start_ns ) {
230 0 : FD_TEST( poh->state==STATE_FOLLOWER || poh->state==STATE_WAITING_FOR_BANK );
231 0 : FD_TEST( slot==poh->next_leader_slot );
232 :
233 : /* PoH ends the slot once it "ticks" through all of the hashes, but we
234 : only want that to happen if we have received a done packing message
235 : from pack, so we always reserve an empty microblock at the end so
236 : the tick advance will not end the slot without being told. */
237 0 : poh->max_microblocks_per_slot = max_microblocks_in_slot+1UL;
238 :
239 0 : FD_TEST( tick_duration_ns==poh->tick_duration_ns );
240 0 : FD_TEST( ticks_per_slot==poh->ticks_per_slot );
241 0 : update_hashes_per_tick( poh, hashcnt_per_tick );
242 :
243 0 : if( FD_LIKELY( poh->state==STATE_FOLLOWER ) ) poh->state = STATE_WAITING_FOR_SLOT;
244 0 : else poh->state = STATE_LEADER;
245 :
246 0 : poh->microblocks_lower_bound = 0UL;
247 0 : poh->leader_slot_start_ns = slot_start_ns;
248 :
249 0 : FD_LOG_INFO(( "begin_leader(slot=%lu, last_slot=%lu, last_hashcnt=%lu)", slot, poh->last_slot, poh->last_hashcnt ));
250 0 : }
251 :
252 : int
253 0 : fd_poh_have_leader_bank( fd_poh_t const * poh ) {
254 0 : return poh->state==STATE_WAITING_FOR_SLOT || poh->state==STATE_LEADER;
255 0 : }
256 :
257 : int
258 0 : fd_poh_hashing_to_leader_slot( fd_poh_t const * poh ) {
259 0 : int hashing = poh->state==STATE_WAITING_FOR_SLOT || poh->state==STATE_LEADER;
260 0 : return hashing && poh->slot<poh->next_leader_slot;
261 0 : }
262 :
263 : int
264 0 : fd_poh_must_tick( fd_poh_t const * poh ) {
265 0 : return poh->state==STATE_LEADER && (poh->hashcnt%poh->hashcnt_per_tick)==(poh->hashcnt_per_tick-1UL);
266 0 : }
267 :
268 : int
269 0 : fd_poh_must_publish_skipped_tick( fd_poh_t const * poh ) {
270 0 : return poh->state==STATE_LEADER && poh->last_slot<poh->slot;
271 0 : }
272 :
273 : void
274 0 : fd_poh_wfs_done( fd_poh_t * poh ) {
275 0 : poh->wfs_paused = 0;
276 0 : poh->reset_slot_start_ns = fd_log_wallclock();
277 0 : }
278 :
279 : void
280 : fd_poh_update_max_microblocks( fd_poh_t * poh,
281 0 : ulong new_max ) {
282 0 : ulong inflated = new_max + 1UL;
283 :
284 : /* Guaranteed to be monotonically decreasing. */
285 0 : FD_TEST( inflated <= poh->max_microblocks_per_slot );
286 0 : poh->max_microblocks_per_slot = inflated;
287 0 : FD_TEST( poh->max_microblocks_per_slot >= poh->microblocks_lower_bound );
288 0 : }
289 :
290 : void
291 : fd_poh_done_packing( fd_poh_t * poh,
292 0 : ulong microblocks_in_slot ) {
293 0 : FD_TEST( poh->state==STATE_LEADER );
294 0 : FD_LOG_INFO(( "done_packing(slot=%lu,seen_microblocks=%lu,microblocks_in_slot=%lu)",
295 0 : poh->slot,
296 0 : poh->microblocks_lower_bound,
297 0 : microblocks_in_slot ));
298 0 : FD_TEST( poh->microblocks_lower_bound==microblocks_in_slot );
299 0 : FD_TEST( poh->microblocks_lower_bound<=poh->max_microblocks_per_slot );
300 :
301 0 : poh->microblocks_lower_bound += 1UL /* done_packing as a phantom "microblock"*/
302 0 : + (poh->max_microblocks_per_slot-1UL) /* the canonical microblock limit */
303 0 : - microblocks_in_slot /* the actual microblock count */;
304 0 : FD_TEST( poh->microblocks_lower_bound==poh->max_microblocks_per_slot );
305 0 : }
306 :
307 : static void
308 : publish_tick( fd_poh_t * poh,
309 : fd_stem_context_t * stem,
310 : uchar hash[ static 32 ],
311 0 : int is_skipped ) {
312 0 : ulong hashcnt = poh->hashcnt_per_tick*(1UL+(poh->last_hashcnt/poh->hashcnt_per_tick));
313 :
314 0 : uchar * dst = (uchar *)fd_chunk_to_laddr( poh->shred_out->mem, poh->shred_out->chunk );
315 :
316 0 : FD_TEST( poh->last_slot>=poh->reset_slot );
317 0 : fd_entry_batch_meta_t * meta = (fd_entry_batch_meta_t *)dst;
318 0 : if( FD_UNLIKELY( is_skipped ) ) {
319 : /* We are publishing ticks for a skipped slot, the reference tick
320 : and block complete flags should always be zero. */
321 0 : meta->reference_tick = 0UL;
322 0 : meta->block_complete = 0;
323 0 : } else {
324 0 : meta->reference_tick = hashcnt/poh->hashcnt_per_tick;
325 0 : meta->block_complete = hashcnt==poh->hashcnt_per_slot;
326 0 : }
327 :
328 0 : meta->parent_block_id_valid = 1;
329 0 : fd_memcpy( meta->parent_block_id, poh->completed_block_id, 32UL );
330 :
331 0 : ulong slot = fd_ulong_if( meta->block_complete, poh->slot-1UL, poh->slot );
332 0 : meta->parent_offset = 1UL+slot-poh->reset_slot;
333 :
334 0 : FD_TEST( hashcnt>poh->last_hashcnt );
335 0 : ulong hash_delta = hashcnt-poh->last_hashcnt;
336 :
337 0 : dst += sizeof(fd_entry_batch_meta_t);
338 0 : fd_entry_batch_header_t * tick = (fd_entry_batch_header_t *)dst;
339 0 : tick->hashcnt_delta = hash_delta;
340 0 : fd_memcpy( tick->hash, hash, 32UL );
341 0 : tick->txn_cnt = 0UL;
342 :
343 0 : ulong tspub = (ulong)fd_frag_meta_ts_comp( fd_tickcount() );
344 0 : ulong sz = sizeof(fd_entry_batch_meta_t)+sizeof(fd_entry_batch_header_t);
345 0 : ulong sig = fd_disco_poh_sig( slot, POH_PKT_TYPE_MICROBLOCK, 0UL );
346 0 : fd_stem_publish( stem, poh->shred_out->idx, sig, poh->shred_out->chunk, sz, 0UL, 0UL, tspub );
347 0 : poh->shred_out->chunk = fd_dcache_compact_next( poh->shred_out->chunk, sz, poh->shred_out->chunk0, poh->shred_out->wmark );
348 :
349 0 : if( FD_UNLIKELY( hashcnt==poh->hashcnt_per_slot ) ) {
350 0 : poh->last_slot++;
351 0 : poh->last_hashcnt = 0UL;
352 0 : } else {
353 0 : poh->last_hashcnt = hashcnt;
354 0 : }
355 0 : }
356 :
357 : void
358 : fd_poh_advance( fd_poh_t * poh,
359 : fd_stem_context_t * stem,
360 : int * opt_poll_in,
361 0 : int * charge_busy ) {
362 0 : if( FD_UNLIKELY( poh->state==STATE_UNINIT || poh->state==STATE_WAITING_FOR_RESET ) ) return;
363 0 : if( FD_UNLIKELY( poh->wfs_paused ) ) return;
364 0 : if( FD_UNLIKELY( poh->state==STATE_WAITING_FOR_BANK ) ) {
365 : /* If we are the leader, but we didn't yet learn what the leader
366 : bank object is from the replay tile, do not do any hashing. */
367 0 : return;
368 0 : }
369 :
370 : /* If we have skipped ticks pending because we skipped some slots to
371 : become leader, register them now one at a time. */
372 0 : if( FD_UNLIKELY( fd_poh_must_publish_skipped_tick( poh ) ) ) {
373 0 : FD_TEST( poh->hashcnt==0UL ); /* Current hashcnt stays 0 until the math below is run at least once. */
374 0 : FD_TEST( !(poh->last_hashcnt%poh->hashcnt_per_tick) ); /* While skipped ticks are being published, last_hashcnt marches forward in increments of hashcnt_per_tick. */
375 0 : ulong publish_hashcnt = poh->last_hashcnt+poh->hashcnt_per_tick;
376 0 : ulong tick_idx = (poh->last_slot*poh->ticks_per_slot+publish_hashcnt/poh->hashcnt_per_tick)%MAX_SKIPPED_TICKS;
377 :
378 0 : publish_tick( poh, stem, poh->skipped_tick_hashes[ tick_idx ], 1 );
379 :
380 : /* If we are catching up now and publishing a bunch of skipped
381 : ticks, we do not want to process any incoming microblocks until
382 : all the skipped ticks have been published out; otherwise we would
383 : intersperse skipped tick messages with microblocks. */
384 0 : *opt_poll_in = 0;
385 0 : *charge_busy = 1;
386 0 : return;
387 0 : }
388 :
389 0 : int low_power_mode = poh->hashcnt_per_tick==1UL;
390 :
391 : /* If we are the leader, always leave enough capacity in the slot so
392 : that we can mixin any potential microblocks still coming from the
393 : pack tile for this slot.
394 :
395 : When not leading (FOLLOWER or WAITING_FOR_SLOT), no microblocks
396 : will be mixed in, so there is nothing to reserve so
397 : restricted_hashcnt does not need to be reduced from hashcnt_per_slot. */
398 0 : ulong max_remaining_microblocks;
399 0 : ulong restricted_hashcnt;
400 0 : if( FD_LIKELY( poh->state==STATE_LEADER ) ) {
401 0 : max_remaining_microblocks = poh->max_microblocks_per_slot - poh->microblocks_lower_bound;
402 :
403 : /* With hashcnt_per_tick hashes per tick, we actually get
404 : hashcnt_per_tick-1 chances to mixin a microblock. For each tick
405 : span that we need to reserve, we also need to reserve the hashcnt
406 : for the tick, hence the +
407 : max_remaining_microblocks/(hashcnt_per_tick-1) rounded up.
408 :
409 : However, if hashcnt_per_tick is 1 because we're in low power mode,
410 : this should probably just be max_remaining_microblocks. */
411 0 : ulong max_remaining_ticks_or_microblocks = max_remaining_microblocks;
412 0 : if( FD_LIKELY( !low_power_mode ) ) max_remaining_ticks_or_microblocks += (max_remaining_microblocks+poh->hashcnt_per_tick-2UL)/(poh->hashcnt_per_tick-1UL);
413 :
414 0 : restricted_hashcnt = fd_ulong_if( poh->hashcnt_per_slot>=max_remaining_ticks_or_microblocks, poh->hashcnt_per_slot-max_remaining_ticks_or_microblocks, 0UL );
415 0 : } else {
416 0 : max_remaining_microblocks = 0UL;
417 0 : restricted_hashcnt = poh->hashcnt_per_slot;
418 0 : }
419 :
420 0 : ulong min_hashcnt = poh->hashcnt;
421 :
422 0 : if( FD_LIKELY( !low_power_mode ) ) {
423 : /* Recall that there are two kinds of events that will get published
424 : to the shredder,
425 :
426 : (a) Ticks. These occur every 62,500 (hashcnt_per_tick) hashcnts,
427 : and there will be 64 (ticks_per_slot) of them in each slot.
428 :
429 : Ticks must not have any transactions mixed into the hash.
430 : This is not strictly needed in theory, but is required by the
431 : current consensus protocol. They get published here in
432 : after_credit.
433 :
434 : (b) Microblocks. These can occur at any other hashcnt, as long
435 : as it is not a tick. Microblocks cannot be empty, and must
436 : have at least one transactions mixed in. These get
437 : published in after_frag.
438 :
439 : If hashcnt_per_tick is 1, then we are in low power mode and the
440 : following does not apply, since we can mix in transactions at any
441 : time.
442 :
443 : In the normal, non-low-power mode, though, we have to be careful
444 : to make sure that we do not publish microblocks on tick
445 : boundaries. To do that, we need to obey two rules:
446 : (i) after_credit must not leave hashcnt one before a tick
447 : boundary
448 : (ii) if after_credit begins one before a tick boundary, it must
449 : advance hashcnt and publish the tick
450 :
451 : There's some interplay between min_hashcnt and restricted_hashcnt
452 : here, and we need to show that there's always a value of
453 : target_hashcnt we can pick such that
454 : min_hashcnt <= target_hashcnt <= restricted_hashcnt.
455 : We'll prove this by induction for current_slot==0 and
456 : is_leader==true, since all other slots should be the same.
457 :
458 : Let m_j and r_j be the min_hashcnt and restricted_hashcnt
459 : (respectively) for the jth call to after_credit in a slot. We
460 : want to show that for all values of j, it's possible to pick a
461 : value h_j, the value of target_hashcnt for the jth call to
462 : after_credit (which is also the value of hashcnt after
463 : after_credit has completed) such that m_j<=h_j<=r_j.
464 :
465 : Additionally, let T be hashcnt_per_tick and N be ticks_per_slot.
466 :
467 : Starting with the base case, j==0. m_j=0, and
468 : r_0 = N*T - max_microblocks_per_slot
469 : - ceil(max_microblocks_per_slot/(T-1)).
470 :
471 : This is monotonic decreasing in max_microblocks_per_slot, so it
472 : achieves its minimum when max_microblocks_per_slot is its
473 : maximum.
474 : r_0 >= N*T - N*(T-1) - ceil( (N*(T-1))/(T-1))
475 : = N*T - N*(T-1)-N = 0.
476 : Thus, m_0 <= r_0, as desired.
477 :
478 :
479 :
480 : Then, for the inductive step, assume there exists h_j such that
481 : m_j<=h_j<=r_j, and we want to show that there exists h_{j+1},
482 : which is the same as showing m_{j+1}<=r_{j+1}.
483 :
484 : Let a_j be 1 if we had a microblock immediately following the jth
485 : call to after_credit, and 0 otherwise. Then hashcnt at the start
486 : of the (j+1)th call to after_frag is h_j+a_j.
487 : Also, set b_{j+1}=1 if we are in the case covered by rule (ii)
488 : above during the (j+1)th call to after_credit, i.e. if
489 : (h_j+a_j)%T==T-1. Thus, m_{j+1} = h_j + a_j + b_{j+1}.
490 :
491 : If we received an additional microblock, then
492 : max_remaining_microblocks goes down by 1, and
493 : max_remaining_ticks_or_microblocks goes down by either 1 or 2,
494 : which means restricted_hashcnt goes up by either 1 or 2. In
495 : particular, it goes up by 2 if the new value of
496 : max_remaining_microblocks (at the start of the (j+1)th call to
497 : after_credit) is congruent to 0 mod T-1. Let b'_{j+1} be 1 if
498 : this condition is met and 0 otherwise. If we receive a
499 : done_packing message, restricted_hashcnt can go up by more, but
500 : we can ignore that case, since it is less restrictive.
501 : Thus, r_{j+1}=r_j+a_j+b'_{j+1}.
502 :
503 : If h_j < r_j (strictly less), then h_j+a_j < r_j+a_j. And thus,
504 : since b_{j+1}<=b'_{j+1}+1, just by virtue of them both being
505 : binary,
506 : h_j + a_j + b_{j+1} < r_j + a_j + b'_{j+1} + 1,
507 : which is the same (for integers) as
508 : h_j + a_j + b_{j+1} <= r_j + a_j + b'_{j+1},
509 : m_{j+1} <= r_{j+1}
510 :
511 : On the other hand, if h_j==r_j, this is easy unless b_{j+1}==1,
512 : which can also only happen if a_j==1. Then (h_j+a_j)%T==T-1,
513 : which means there's an integer k such that
514 :
515 : h_j+a_j==(ticks_per_slot-k)*T-1
516 : h_j ==ticks_per_slot*T - k*(T-1)-1 - k-1
517 : ==ticks_per_slot*T - (k*(T-1)+1) - ceil( (k*(T-1)+1)/(T-1) )
518 :
519 : Since h_j==r_j in this case, and
520 : r_j==(ticks_per_slot*T) - max_remaining_microblocks_j - ceil(max_remaining_microblocks_j/(T-1)),
521 : we can see that the value of max_remaining_microblocks at the
522 : start of the jth call to after_credit is k*(T-1)+1. Again, since
523 : a_j==1, then the value of max_remaining_microblocks at the start
524 : of the j+1th call to after_credit decreases by 1 to k*(T-1),
525 : which means b'_{j+1}=1.
526 :
527 : Thus, h_j + a_j + b_{j+1} == r_j + a_j + b'_{j+1}, so, in
528 : particular, h_{j+1}<=r_{j+1} as desired. */
529 0 : min_hashcnt += (ulong)(min_hashcnt%poh->hashcnt_per_tick == (poh->hashcnt_per_tick-1UL)); /* add b_{j+1}, enforcing rule (ii) */
530 0 : }
531 : /* Now figure out how many hashes are needed to "catch up" the hash
532 : count to the current system clock, and clamp it to the allowed
533 : range. */
534 0 : long now = fd_clock_tile_now( poh->clock );
535 0 : ulong target_hashcnt;
536 0 : if( FD_LIKELY( poh->state==STATE_FOLLOWER ||poh->state==STATE_WAITING_FOR_SLOT ) ) {
537 0 : target_hashcnt = (ulong)((double)(now - poh->reset_slot_start_ns) / poh->hashcnt_duration_ns) - (poh->slot-poh->reset_slot)*poh->hashcnt_per_slot;
538 0 : } else {
539 0 : FD_TEST( poh->state==STATE_LEADER );
540 0 : target_hashcnt = (ulong)((double)(now - poh->leader_slot_start_ns) / poh->hashcnt_duration_ns);
541 0 : }
542 : /* Clamp to [min_hashcnt, restricted_hashcnt] as above */
543 0 : target_hashcnt = fd_ulong_max( fd_ulong_min( target_hashcnt, restricted_hashcnt ), min_hashcnt );
544 :
545 : /* The above proof showed that it was always possible to pick a value
546 : of target_hashcnt, but we still have a lot of freedom in how to
547 : pick it. It simplifies the code a lot if we don't keep going after
548 : a tick in this function. In particular, we want to publish at most
549 : 1 tick in this call, since otherwise we could consume infinite
550 : credits to publish here. The credits are set so that we should
551 : only ever publish one tick during this loop. Also, all the extra
552 : stuff (leader transitions, publishing ticks, etc.) we have to do
553 : happens at tick boundaries, so this lets us consolidate all those
554 : cases.
555 :
556 : Mathematically, since the current value of hashcnt is h_j+a_j, the
557 : next tick (advancing a full tick if we're currently at a tick) is
558 : t_{j+1} = T*(floor( (h_j+a_j)/T )+1). We need to show that if we set
559 : h'_{j+1} = min( h_{j+1}, t_{j+1} ), it is still valid.
560 :
561 : First, h'_{j+1} <= h_{j+1} <= r_{j+1}, so we're okay in that
562 : direction.
563 :
564 : Next, observe that t_{j+1}>=h_j + a_j + 1, and recall that b_{j+1}
565 : is 0 or 1. So then,
566 : t_{j+1} >= h_j+a_j+b_{j+1} = m_{j+1}.
567 :
568 : We know h_{j+1) >= m_{j+1} from before, so then h'_{j+1} >=
569 : m_{j+1}, as desired. */
570 :
571 0 : ulong next_tick_hashcnt = poh->hashcnt_per_tick * (1UL+(poh->hashcnt/poh->hashcnt_per_tick));
572 0 : target_hashcnt = fd_ulong_min( target_hashcnt, next_tick_hashcnt );
573 :
574 : /* We still need to enforce rule (i). We know that min_hashcnt%T !=
575 : T-1 because of rule (ii). That means that if target_hashcnt%T ==
576 : T-1 at this point, target_hashcnt > min_hashcnt (notice the
577 : strict), so target_hashcnt-1 >= min_hashcnt and is thus still a
578 : valid choice for target_hashcnt. */
579 0 : target_hashcnt -= (ulong)( (!low_power_mode) & ((target_hashcnt%poh->hashcnt_per_tick)==(poh->hashcnt_per_tick-1UL)) );
580 :
581 0 : FD_TEST( target_hashcnt >= poh->hashcnt );
582 0 : FD_TEST( target_hashcnt >= min_hashcnt );
583 0 : FD_TEST( target_hashcnt <= restricted_hashcnt );
584 :
585 0 : if( FD_UNLIKELY( poh->hashcnt==target_hashcnt ) ) return; /* Nothing to do, don't publish a tick twice */
586 :
587 0 : *charge_busy = 1;
588 :
589 0 : if( FD_LIKELY( poh->hashcnt<target_hashcnt ) ) {
590 0 : fd_sha256_hash_32_repeated( poh->hash, poh->hash, target_hashcnt-poh->hashcnt );
591 0 : poh->hashcnt = target_hashcnt;
592 0 : }
593 :
594 0 : if( FD_UNLIKELY( poh->hashcnt==poh->hashcnt_per_slot ) ) {
595 0 : poh->slot++;
596 0 : poh->hashcnt = 0UL;
597 0 : }
598 :
599 0 : switch( poh->state ) {
600 0 : case STATE_LEADER: {
601 0 : if( FD_UNLIKELY( !(poh->hashcnt%poh->hashcnt_per_tick) ) ) {
602 : /* We ticked while leader... send an empty microblock (a tick)
603 : to the shred tile. */
604 0 : publish_tick( poh, stem, poh->hash, 0 );
605 0 : }
606 0 : if( FD_UNLIKELY( poh->slot>poh->next_leader_slot ) ) {
607 : /* We ticked while leader and are no longer leader... transition
608 : the state machine. */
609 0 : FD_TEST( !max_remaining_microblocks );
610 0 : FD_LOG_INFO(( "fd_poh_ticked_outof_leader(slot=%lu)", poh->slot-1UL ));
611 0 : transition_to_follower( poh, stem, 1 );
612 0 : poh->state = STATE_WAITING_FOR_RESET;
613 0 : }
614 0 : break;
615 0 : }
616 0 : case STATE_WAITING_FOR_SLOT:
617 0 : case STATE_FOLLOWER: {
618 0 : if( FD_UNLIKELY( !(poh->hashcnt%poh->hashcnt_per_tick ) && poh->next_leader_slot!=ULONG_MAX ) ) {
619 : /* We finished a tick while not leader... save the current hash
620 : so it can be played back into the bank when we become the
621 : leader.
622 :
623 : If next_leader_slot is ULONG_MAX, we have no upcoming leader
624 : slot and these tick hashes will never be published, so we
625 : skip storing them. */
626 0 : ulong tick_idx = (poh->slot*poh->ticks_per_slot+poh->hashcnt/poh->hashcnt_per_tick)%MAX_SKIPPED_TICKS;
627 0 : fd_memcpy( poh->skipped_tick_hashes[ tick_idx ], poh->hash, 32UL );
628 :
629 0 : ulong initial_tick_idx = (poh->last_slot*poh->ticks_per_slot+poh->last_hashcnt/poh->hashcnt_per_tick)%MAX_SKIPPED_TICKS;
630 0 : if( FD_UNLIKELY( tick_idx==initial_tick_idx ) ) FD_LOG_ERR(( "Too many skipped ticks from slot %lu to slot %lu, chain must halt", poh->last_slot, poh->slot ));
631 0 : }
632 :
633 0 : FD_TEST( poh->slot<=poh->next_leader_slot );
634 0 : if( FD_UNLIKELY( poh->slot==poh->next_leader_slot ) ) {
635 : /* We ticked while not leader and are now leader... transition
636 : the state machine. */
637 0 : if( FD_LIKELY( poh->state==STATE_FOLLOWER ) ) poh->state = STATE_WAITING_FOR_BANK;
638 0 : else poh->state = STATE_LEADER;
639 0 : }
640 0 : break;
641 0 : }
642 0 : default: {
643 0 : break;
644 0 : }
645 0 : }
646 0 : }
647 :
648 : static void
649 : publish_microblock( fd_poh_t * poh,
650 : fd_stem_context_t * stem,
651 : ulong slot,
652 : ulong hashcnt_delta,
653 : ulong txn_cnt,
654 0 : fd_txn_p_t const * txns ) {
655 0 : uchar * dst = (uchar *)fd_chunk_to_laddr( poh->shred_out->mem, poh->shred_out->chunk );
656 0 : FD_TEST( slot>=poh->reset_slot );
657 0 : fd_entry_batch_meta_t * meta = (fd_entry_batch_meta_t *)dst;
658 0 : meta->parent_offset = 1UL+slot-poh->reset_slot;
659 0 : meta->reference_tick = (poh->hashcnt/poh->hashcnt_per_tick) % poh->ticks_per_slot;
660 0 : meta->block_complete = !poh->hashcnt;
661 :
662 0 : meta->parent_block_id_valid = 1;
663 0 : fd_memcpy( meta->parent_block_id, poh->completed_block_id, 32UL );
664 :
665 0 : dst += sizeof(fd_entry_batch_meta_t);
666 0 : fd_entry_batch_header_t * header = (fd_entry_batch_header_t *)dst;
667 0 : header->hashcnt_delta = hashcnt_delta;
668 0 : fd_memcpy( header->hash, poh->hash, 32UL );
669 :
670 0 : dst += sizeof(fd_entry_batch_header_t);
671 0 : ulong payload_sz = 0UL;
672 0 : ulong included_txn_cnt = 0UL;
673 0 : for( ulong i=0UL; i<txn_cnt; i++ ) {
674 0 : fd_txn_p_t const * txn = txns + i;
675 0 : if( FD_UNLIKELY( !(txn->flags & FD_TXN_P_FLAGS_EXECUTE_SUCCESS) ) ) continue;
676 :
677 0 : fd_memcpy( dst, txn->payload, txn->payload_sz );
678 0 : payload_sz += txn->payload_sz;
679 0 : dst += txn->payload_sz;
680 0 : included_txn_cnt++;
681 0 : }
682 0 : header->txn_cnt = included_txn_cnt;
683 :
684 : /* We always have credits to publish here, because we have a burst
685 : value of 3 credits, and at most we will publish_tick() once and
686 : then publish_became_leader() once, leaving one credit here to
687 : publish the microblock. */
688 0 : ulong tspub = (ulong)fd_frag_meta_ts_comp( fd_tickcount() );
689 0 : ulong sz = sizeof(fd_entry_batch_meta_t)+sizeof(fd_entry_batch_header_t)+payload_sz;
690 0 : ulong new_sig = fd_disco_poh_sig( slot, POH_PKT_TYPE_MICROBLOCK, 0UL );
691 0 : fd_stem_publish( stem, poh->shred_out->idx, new_sig, poh->shred_out->chunk, sz, 0UL, 0UL, tspub );
692 0 : poh->shred_out->chunk = fd_dcache_compact_next( poh->shred_out->chunk, sz, poh->shred_out->chunk0, poh->shred_out->wmark );
693 0 : }
694 :
695 : void
696 : fd_poh1_mixin( fd_poh_t * poh,
697 : fd_stem_context_t * stem,
698 : ulong slot,
699 : uchar const * hash,
700 : ulong txn_cnt,
701 0 : fd_txn_p_t const * txns ) {
702 0 : if( FD_UNLIKELY( slot!=poh->next_leader_slot || slot!=poh->slot ) ) {
703 0 : FD_LOG_ERR(( "packed too early or late slot=%lu, current_slot=%lu", slot, poh->slot ));
704 0 : }
705 0 : if( FD_UNLIKELY( (poh->hashcnt%poh->hashcnt_per_tick)==(poh->hashcnt_per_tick-1UL) ) ) FD_LOG_CRIT(( "a tick will be skipped due to hashcnt %lu hashcnt_per_tick %lu", poh->hashcnt, poh->hashcnt_per_tick ));
706 :
707 0 : FD_TEST( poh->state==STATE_LEADER );
708 0 : FD_TEST( poh->microblocks_lower_bound<poh->max_microblocks_per_slot );
709 0 : poh->microblocks_lower_bound += 1UL;
710 :
711 0 : ulong executed_txn_cnt = 0UL;
712 0 : for( ulong i=0UL; i<txn_cnt; i++ ) {
713 : /* It's important that we check if a transaction is included in the
714 : block with FD_TXN_P_FLAGS_EXECUTE_SUCCESS since
715 : actual_consumed_cus may have a nonzero value for excluded
716 : transactions used for monitoring purposes */
717 0 : if( FD_LIKELY( txns[ i ].flags & FD_TXN_P_FLAGS_EXECUTE_SUCCESS ) ) {
718 0 : executed_txn_cnt++;
719 0 : }
720 0 : }
721 :
722 : /* We don't publish transactions that fail to execute. If all the
723 : transactions failed to execute, the microblock would be empty,
724 : causing agave to think it's a tick and complain. Instead, we just
725 : skip the microblock and don't hash or update the hashcnt. */
726 0 : if( FD_UNLIKELY( !executed_txn_cnt ) ) return;
727 :
728 0 : uchar data[ 64 ];
729 0 : fd_memcpy( data, poh->hash, 32UL );
730 0 : fd_memcpy( data+32UL, hash, 32UL );
731 0 : fd_sha256_hash( data, 64UL, poh->hash );
732 :
733 0 : poh->hashcnt++;
734 0 : FD_TEST( poh->hashcnt>poh->last_hashcnt );
735 0 : ulong hashcnt_delta = poh->hashcnt - poh->last_hashcnt;
736 :
737 : /* The hashing loop above will never leave us exactly one away from
738 : crossing a tick boundary, so this increment will never cause the
739 : current tick (or the slot) to change, except in low power mode
740 : for development, in which case we do need to register the tick
741 : with the leader bank. We don't need to publish the tick since
742 : sending the microblock below is the publishing action. */
743 0 : if( FD_UNLIKELY( !(poh->hashcnt%poh->hashcnt_per_slot ) ) ) {
744 0 : poh->slot++;
745 0 : poh->hashcnt = 0UL;
746 0 : }
747 :
748 0 : poh->last_slot = poh->slot;
749 0 : poh->last_hashcnt = poh->hashcnt;
750 :
751 0 : if( FD_UNLIKELY( !(poh->hashcnt%poh->hashcnt_per_tick ) ) ) {
752 0 : if( FD_UNLIKELY( poh->slot>poh->next_leader_slot ) ) {
753 : /* We ticked while leader and are no longer leader... transition
754 : the state machine. */
755 0 : transition_to_follower( poh, stem, 1 );
756 0 : poh->state = STATE_WAITING_FOR_RESET;
757 0 : }
758 0 : }
759 :
760 0 : publish_microblock( poh, stem, slot, hashcnt_delta, txn_cnt, txns );
761 0 : }
|