Line data Source code
1 : #include "fd_poh.h"
2 : #include "fd_poh_tile.h"
3 : #include "../replay/fd_replay_tile.h"
4 : #include "../../disco/tiles.h"
5 : #include "../../disco/fd_clock_tile.h"
6 : #include "../../discof/fd_startup.h"
7 : #include <time.h>
8 : #include "generated/fd_poh_tile_seccomp.h"
9 :
10 0 : #define IN_KIND_REPLAY (0)
11 0 : #define IN_KIND_PACK (1)
12 0 : #define IN_KIND_EXECLE (2)
13 :
14 : struct fd_poh_in {
15 : fd_wksp_t * mem;
16 : ulong chunk0;
17 : ulong wmark;
18 : ulong mtu;
19 : };
20 :
21 : typedef struct fd_poh_in fd_poh_in_t;
22 :
23 : struct fd_poh_tile {
24 : fd_poh_t poh[1];
25 :
26 : /* There's a race condition ... let's say two execles A and B, execle
27 : A processes some transactions, then releases the account locks, and
28 : sends the microblock to PoH to be stamped. Pack now re-packs the
29 : same accounts with a new microblock, sends to execle B, execle B
30 : executes and sends the microblock to PoH, and this all happens fast
31 : enough that PoH picks the 2nd block to stamp before the 1st. The
32 : accounts database changes now are misordered with respect to PoH so
33 : replay could fail.
34 :
35 : To prevent this race, we order all microblocks and only process
36 : them in PoH in the order they are produced by pack. This is a
37 : little bit over-strict, we just need to ensure that microblocks
38 : with conflicting accounts execute in order, but this is easiest to
39 : implement for now. */
40 : uint expect_pack_idx;
41 :
42 : ulong in_cnt;
43 : ulong idle_cnt;
44 :
45 : int in_kind[ 64 ];
46 : fd_poh_in_t in[ 64 ];
47 :
48 : fd_poh_out_t shred_out[ 1 ];
49 : fd_poh_out_t replay_out[ 1 ];
50 : };
51 :
52 : typedef struct fd_poh_tile fd_poh_tile_t;
53 :
54 : FD_FN_CONST static inline ulong
55 0 : scratch_align( void ) {
56 0 : return 128UL;
57 0 : }
58 :
59 : FD_FN_PURE static inline ulong
60 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
61 0 : (void)tile;
62 0 : ulong l = FD_LAYOUT_INIT;
63 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_poh_tile_t), sizeof(fd_poh_tile_t) );
64 0 : return FD_LAYOUT_FINI( l, scratch_align() );
65 0 : }
66 :
67 : static inline void
68 0 : during_housekeeping( fd_poh_tile_t * ctx ) {
69 0 : if( FD_UNLIKELY( fd_clock_tile_recal_due( ctx->poh->clock ) ) ) {
70 0 : fd_clock_tile_recal( ctx->poh->clock );
71 0 : }
72 0 : }
73 :
74 : static inline void
75 : after_credit( fd_poh_tile_t * ctx,
76 : fd_stem_context_t * stem,
77 : int * opt_poll_in,
78 0 : int * charge_busy ) {
79 0 : ctx->idle_cnt++;
80 0 : if( FD_LIKELY( ctx->idle_cnt>=2UL*ctx->in_cnt || fd_poh_must_tick( ctx->poh ) || fd_poh_must_publish_skipped_tick( ctx->poh ) ) ) {
81 : /* We would like to fully drain input links to the best of our
82 : knowledge, before we spend cycles on hashing. That is, we would
83 : like to assert that all input links have stayed empty since the
84 : last time we polled. Given an arbitrary input link L, the worst
85 : case is when L is at idx 0 in the input link shuffle the last
86 : time we polled a frag from it, but then link L ends up at idx
87 : in_cnt-1 in the subsequent input link shuffle. So strictly
88 : speaking we will need to have observed 2*in_cnt-1 consecutive
89 : empty in links to be able to assert that link L has been empty
90 : since the last time we polled it.
91 :
92 : Except that when we are leader and the hashcnt is right before a
93 : tick boundary, poh must advance to the tick boundary and produce
94 : the tick. Otherwise, a tick will be skipped if a microblock
95 : mixin happens. Additionally, when there are pending skipped
96 : ticks to be published, we should do that before processing any
97 : incoming microblocks. */
98 0 : fd_poh_advance( ctx->poh, stem, opt_poll_in, charge_busy );
99 0 : ctx->idle_cnt = 0UL;
100 0 : }
101 0 : }
102 :
103 : /* ....
104 :
105 : 1. replay -> (pack, poh) ... start packing for slot
106 : 2. if slot in progress -> pack -> poh (abandon_packing) for old slot
107 : 3. pack free to start packing
108 : 4. if poh slot in progress, refuse replay frag ... until see abandon_packing
109 : 5. poh must process pack frags in order
110 : 6. when poh sees done_packing/abandon_packing, return poh -> replay saying execle unused now */
111 :
112 : static inline int
113 : returnable_frag( fd_poh_tile_t * ctx,
114 : ulong in_idx,
115 : ulong seq,
116 : ulong sig,
117 : ulong chunk,
118 : ulong sz,
119 : ulong ctl,
120 : ulong tsorig,
121 : ulong tspub,
122 0 : fd_stem_context_t * stem ) {
123 0 : (void)seq;
124 0 : (void)ctl;
125 0 : (void)tsorig;
126 0 : (void)tspub;
127 :
128 : /* TODO: Pack has a workaround for Frankendancer that sequences bank
129 : release to manage lifetimes, but it's not needed in Firedancer so
130 : we just drop it. We shouldn't send it at all in future. */
131 0 : if( FD_UNLIKELY( sig==FD_PACK_MSG_DONE_DRAINING && ctx->in_kind[ in_idx ]==IN_KIND_PACK ) ) {
132 0 : ctx->idle_cnt = 0UL;
133 0 : return 0;
134 0 : }
135 :
136 : /* Pack periodically publishes a tighter microblock bound over the
137 : pack_poh link. */
138 0 : if( FD_UNLIKELY( sig==FD_PACK_MSG_REDUCE_MB_BOUND && ctx->in_kind[ in_idx ]==IN_KIND_PACK ) ) {
139 0 : ctx->idle_cnt = 0UL;
140 0 : if( FD_UNLIKELY( !fd_poh_have_leader_bank( ctx->poh ) ) ) return 0; /* must have become leader first */
141 0 : FD_TEST( sz==sizeof(ulong) );
142 0 : ulong const * new_max = fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk );
143 0 : fd_poh_update_max_microblocks( ctx->poh, *new_max );
144 0 : return 0;
145 0 : }
146 :
147 0 : if( FD_UNLIKELY( sig==REPLAY_SIG_WFS_DONE && ctx->in_kind[ in_idx ]==IN_KIND_REPLAY ) ) {
148 0 : fd_poh_wfs_done( ctx->poh );
149 0 : ctx->idle_cnt = 0UL;
150 0 : return 0;
151 0 : }
152 :
153 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) )
154 0 : FD_LOG_ERR(( "chunk %lu %lu corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
155 :
156 : /* There's a race condition where we might receive microblocks from
157 : execles before we have learned what the leader bank is from replay
158 : (the become_leader message makes it from replay->pack->execle->poh)
159 : before it just makes it from replay->poh. This is rare but
160 : violates invariants in poh, so we simply do not process any
161 : transactions for mixin until we have learned what the leader bank
162 : is. */
163 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_EXECLE && !fd_poh_have_leader_bank( ctx->poh ) ) ) return 1;
164 :
165 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_REPLAY && fd_poh_have_leader_bank( ctx->poh ) ) ) return 1;
166 : /* If prior leaders skipped, it might happen that replay tells us to
167 : become leader, but poh is still hashing through the skipped slots
168 : and could not yet mixin any microblocks. In this case, we hold
169 : the microblocks and do not mixin them yet until we have hashed
170 : through to the actual leader slot.
171 :
172 : It might actually be allowed by the protocol to mixin earlier, but
173 : that really doesn't seem like a good idea.
174 :
175 : It's fine to block pack/execles on hashing here, because they we
176 : are going to have the wait for the full block to timeout once it
177 : starts. */
178 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_EXECLE && fd_poh_hashing_to_leader_slot( ctx->poh ) ) ) return 1;
179 0 : if( FD_LIKELY( ctx->in_kind[ in_idx ]==IN_KIND_EXECLE || ctx->in_kind[ in_idx ]==IN_KIND_PACK ) ) {
180 0 : uint pack_idx = (uint)fd_disco_execle_sig_pack_idx( sig );
181 0 : if( FD_UNLIKELY( ((int)(pack_idx-ctx->expect_pack_idx))<0L ) ) FD_LOG_ERR(( "received out of order pack_idx %u (expecting %u)", pack_idx, ctx->expect_pack_idx ));
182 0 : if( FD_UNLIKELY( pack_idx!=ctx->expect_pack_idx ) ) return 1;
183 0 : ctx->expect_pack_idx++;
184 0 : }
185 :
186 0 : switch( ctx->in_kind[ in_idx ] ) {
187 0 : case IN_KIND_PACK: {
188 0 : fd_done_packing_t const * done_packing = fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk );
189 0 : fd_poh_done_packing( ctx->poh, done_packing->microblocks_in_slot );
190 0 : break;
191 0 : }
192 0 : case IN_KIND_REPLAY: {
193 0 : if( FD_LIKELY( sig==REPLAY_SIG_BECAME_LEADER ) ) {
194 0 : fd_became_leader_t const * became_leader = fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk );
195 0 : fd_poh_begin_leader( ctx->poh, became_leader->slot, became_leader->hashcnt_per_tick, became_leader->ticks_per_slot, became_leader->tick_duration_ns, became_leader->max_microblocks_in_slot, became_leader->slot_start_ns );
196 0 : } else if( sig==REPLAY_SIG_RESET ) {
197 0 : fd_poh_reset_t const * reset = fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk );
198 0 : fd_poh_reset( ctx->poh, stem, reset->timestamp, reset->hashcnt_per_tick, reset->ticks_per_slot, reset->tick_duration_ns, reset->completed_slot, reset->completed_blockhash, reset->next_leader_slot, reset->max_microblocks_in_slot, reset->completed_block_id );
199 0 : ctx->poh->wfs_paused = reset->wfs_paused;
200 0 : }
201 0 : break;
202 0 : }
203 0 : case IN_KIND_EXECLE: {
204 0 : ulong target_slot = fd_disco_execle_sig_slot( sig );
205 0 : FD_TEST( sz>=sizeof(fd_microblock_trailer_t) && (sz-sizeof(fd_microblock_trailer_t))%sizeof(fd_txn_p_t)==0UL );
206 0 : ulong txn_cnt = (sz-sizeof(fd_microblock_trailer_t))/sizeof(fd_txn_p_t);
207 0 : fd_txn_p_t const * txns = fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk );
208 0 : fd_microblock_trailer_t const * trailer = fd_type_pun_const( (uchar const*)txns+sz-sizeof(fd_microblock_trailer_t) );
209 0 : fd_poh1_mixin( ctx->poh, stem, target_slot, trailer->hash, txn_cnt, txns );
210 0 : break;
211 0 : }
212 0 : default: {
213 0 : FD_LOG_ERR(( "unexpected input kind %d", ctx->in_kind[ in_idx ] ));
214 0 : break;
215 0 : }
216 0 : }
217 :
218 0 : ctx->idle_cnt = 0UL;
219 0 : return 0;
220 0 : }
221 :
222 : static inline fd_poh_out_t
223 : out1( fd_topo_t const * topo,
224 : fd_topo_tile_t const * tile,
225 0 : char const * name ) {
226 0 : ulong idx = ULONG_MAX;
227 :
228 0 : for( ulong i=0UL; i<tile->out_cnt; i++ ) {
229 0 : fd_topo_link_t const * link = &topo->links[ tile->out_link_id[ i ] ];
230 0 : if( !strcmp( link->name, name ) ) {
231 0 : if( FD_UNLIKELY( idx!=ULONG_MAX ) ) FD_LOG_ERR(( "tile %s:%lu had multiple output links named %s but expected one", tile->name, tile->kind_id, name ));
232 0 : idx = i;
233 0 : }
234 0 : }
235 :
236 0 : if( FD_UNLIKELY( idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile %s:%lu had no output link named %s", tile->name, tile->kind_id, name ));
237 :
238 0 : void * mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ idx ] ].dcache_obj_id ].wksp_id ].wksp;
239 0 : ulong chunk0 = fd_dcache_compact_chunk0( mem, topo->links[ tile->out_link_id[ idx ] ].dcache );
240 0 : ulong wmark = fd_dcache_compact_wmark ( mem, topo->links[ tile->out_link_id[ idx ] ].dcache, topo->links[ tile->out_link_id[ idx ] ].mtu );
241 :
242 0 : return (fd_poh_out_t){ .idx = idx, .mem = mem, .chunk0 = chunk0, .wmark = wmark, .chunk = chunk0 };
243 0 : }
244 :
245 : static void
246 : unprivileged_init( fd_topo_t const * topo,
247 0 : fd_topo_tile_t const * tile ) {
248 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
249 :
250 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
251 0 : fd_poh_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_poh_tile_t ), sizeof( fd_poh_tile_t ) );
252 :
253 0 : ctx->expect_pack_idx = 0UL;
254 :
255 0 : ctx->in_cnt = tile->in_cnt;
256 0 : ctx->idle_cnt = 0UL;
257 :
258 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
259 0 : fd_topo_link_t const * link = &topo->links[ tile->in_link_id[ i ] ];
260 0 : fd_topo_wksp_t const * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
261 :
262 0 : ctx->in[ i ].mem = link_wksp->wksp;
263 0 : ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
264 0 : ctx->in[ i ].wmark = fd_dcache_compact_wmark ( ctx->in[ i ].mem, link->dcache, link->mtu );
265 0 : ctx->in[ i ].mtu = link->mtu;
266 :
267 0 : if( !strcmp( link->name, "replay_out" ) ) ctx->in_kind[ i ] = IN_KIND_REPLAY;
268 0 : else if( !strcmp( link->name, "pack_poh" ) ) ctx->in_kind[ i ] = IN_KIND_PACK;
269 0 : else if( !strcmp( link->name, "execle_poh" ) ) ctx->in_kind[ i ] = IN_KIND_EXECLE;
270 0 : else FD_LOG_ERR(( "unexpected input link name %s", link->name ));
271 0 : }
272 :
273 0 : *ctx->shred_out = out1( topo, tile, "poh_shred" );
274 0 : *ctx->replay_out = out1( topo, tile, "poh_replay" );
275 :
276 0 : FD_TEST( fd_poh_join( fd_poh_new( ctx->poh ), ctx->shred_out, ctx->replay_out ) );
277 :
278 0 : fd_clock_tile_init( ctx->poh->clock );
279 :
280 0 : ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
281 0 : if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
282 0 : FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
283 :
284 0 : fd_sleep_until_replay_started( topo );
285 0 : }
286 :
287 : static ulong
288 : populate_allowed_seccomp( fd_topo_t const * topo,
289 : fd_topo_tile_t const * tile,
290 : ulong out_cnt,
291 0 : struct sock_filter * out ) {
292 0 : (void)topo;
293 0 : (void)tile;
294 :
295 0 : populate_sock_filter_policy_fd_poh_tile( out_cnt, out, (uint)fd_log_private_logfile_fd() );
296 0 : return sock_filter_policy_fd_poh_tile_instr_cnt;
297 0 : }
298 :
299 : static ulong
300 : populate_allowed_fds( fd_topo_t const * topo,
301 : fd_topo_tile_t const * tile,
302 : ulong out_fds_cnt,
303 0 : int * out_fds ) {
304 0 : (void)topo;
305 0 : (void)tile;
306 :
307 0 : if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
308 :
309 0 : ulong out_cnt = 0UL;
310 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
311 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
312 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
313 0 : return out_cnt;
314 0 : }
315 :
316 : /* One tick, one microblock */
317 0 : #define STEM_BURST (2UL)
318 :
319 : /* See explanation in fd_pack */
320 0 : #define STEM_LAZY (128L*3000L)
321 :
322 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_poh_tile_t
323 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_poh_tile_t)
324 :
325 0 : #define STEM_CALLBACK_DURING_HOUSEKEEPING during_housekeeping
326 0 : #define STEM_CALLBACK_AFTER_CREDIT after_credit
327 0 : #define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag
328 :
329 : #include "../../disco/stem/fd_stem.c"
330 :
331 : fd_topo_run_tile_t fd_tile_poh = {
332 : .name = "poh",
333 : .populate_allowed_seccomp = populate_allowed_seccomp,
334 : .populate_allowed_fds = populate_allowed_fds,
335 : .scratch_align = scratch_align,
336 : .scratch_footprint = scratch_footprint,
337 : .privileged_init = NULL,
338 : .unprivileged_init = unprivileged_init,
339 : .run = stem_run,
340 : };
|