Line data Source code
1 : #include "fd_poh.h"
2 : #include "generated/fd_poh_tile_seccomp.h"
3 : #include "fd_poh_tile.h"
4 : #include "../replay/fd_replay_tile.h"
5 : #include "../../disco/tiles.h"
6 :
7 0 : #define IN_KIND_REPLAY (0)
8 0 : #define IN_KIND_PACK (1)
9 0 : #define IN_KIND_BANK (2)
10 :
11 : struct fd_poh_in {
12 : fd_wksp_t * mem;
13 : ulong chunk0;
14 : ulong wmark;
15 : ulong mtu;
16 : };
17 :
18 : typedef struct fd_poh_in fd_poh_in_t;
19 :
20 : struct fd_poh_tile {
21 : fd_poh_t poh[1];
22 :
23 : /* There's a race condition ... let's say two banks A and B, bank A
24 : processes some transactions, then releases the account locks, and
25 : sends the microblock to PoH to be stamped. Pack now re-packs the
26 : same accounts with a new microblock, sends to bank B, bank B
27 : executes and sends the microblock to PoH, and this all happens fast
28 : enough that PoH picks the 2nd block to stamp before the 1st. The
29 : accounts database changes now are misordered with respect to PoH so
30 : replay could fail.
31 :
32 : To prevent this race, we order all microblocks and only process
33 : them in PoH in the order they are produced by pack. This is a
34 : little bit over-strict, we just need to ensure that microblocks
35 : with conflicting accounts execute in order, but this is easiest to
36 : implement for now. */
37 : uint expect_pack_idx;
38 :
39 : ulong in_cnt;
40 : ulong idle_cnt;
41 :
42 : int in_kind[ 64 ];
43 : fd_poh_in_t in[ 64 ];
44 :
45 : fd_poh_out_t shred_out[ 1 ];
46 : fd_poh_out_t replay_out[ 1 ];
47 : };
48 :
49 : typedef struct fd_poh_tile fd_poh_tile_t;
50 :
51 : FD_FN_CONST static inline ulong
52 0 : scratch_align( void ) {
53 0 : return 128UL;
54 0 : }
55 :
56 : FD_FN_PURE static inline ulong
57 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
58 0 : (void)tile;
59 0 : ulong l = FD_LAYOUT_INIT;
60 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_poh_tile_t), sizeof(fd_poh_tile_t) );
61 0 : return FD_LAYOUT_FINI( l, scratch_align() );
62 0 : }
63 :
64 : static inline void
65 : after_credit( fd_poh_tile_t * ctx,
66 : fd_stem_context_t * stem,
67 : int * opt_poll_in,
68 0 : int * charge_busy ) {
69 0 : ctx->idle_cnt++;
70 0 : if( FD_LIKELY( ctx->idle_cnt>=2UL*ctx->in_cnt || fd_poh_must_tick( ctx->poh ) ) ) {
71 : /* We would like to fully drain input links to the best of our
72 : knowledge, before we spend cycles on hashing. That is, we would
73 : like to assert that all input links have stayed empty since the
74 : last time we polled. Given an arbitrary input link L, the worst
75 : case is when L is at idx 0 in the input link shuffle the last
76 : time we polled a frag from it, but then link L ends up at idx
77 : in_cnt-1 in the subsequent input link shuffle. So strictly
78 : speaking we will need to have observed 2*in_cnt-1 consecutive
79 : empty in links to be able to assert that link L has been empty
80 : since the last time we polled it.
81 :
82 : Except that when we are leader and the hashcnt is right before a
83 : tick boundary, poh must advance to the tick boundary and produce
84 : the tick. Otherwise, a tick will be skipped if a microblock
85 : mixin happens. */
86 0 : fd_poh_advance( ctx->poh, stem, opt_poll_in, charge_busy );
87 0 : ctx->idle_cnt = 0UL;
88 0 : }
89 0 : }
90 :
91 : /* ....
92 :
93 : 1. replay -> (pack, poh) ... start packing for slot
94 : 2. if slot in progress -> pack -> poh (abandon_packing) for old slot
95 : 3. pack free to start packing
96 : 4. if poh slot in progress, refuse replay frag ... until see abandon_packing
97 : 5. poh must process pack frags in order
98 : 6. when poh sees done_packing/abandon_packing, return poh -> replay saying bank unused now */
99 :
100 : static inline int
101 : returnable_frag( fd_poh_tile_t * ctx,
102 : ulong in_idx,
103 : ulong seq,
104 : ulong sig,
105 : ulong chunk,
106 : ulong sz,
107 : ulong ctl,
108 : ulong tsorig,
109 : ulong tspub,
110 0 : fd_stem_context_t * stem ) {
111 0 : (void)seq;
112 0 : (void)ctl;
113 0 : (void)tsorig;
114 0 : (void)tspub;
115 :
116 : /* TODO: Pack has a workaround for Frankendancer that sequences bank
117 : release to manage lifetimes, but it's not needed in Firedancer so
118 : we just drop it. We shouldn't send it at all in future. */
119 0 : if( FD_UNLIKELY( sig==ULONG_MAX && ctx->in_kind[ in_idx ]==IN_KIND_PACK ) ) {
120 0 : ctx->idle_cnt = 0UL;
121 0 : return 0;
122 0 : }
123 :
124 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) )
125 0 : FD_LOG_ERR(( "chunk %lu %lu corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
126 :
127 : /* There's a race condition where we might receive microblocks from
128 : banks before we have learned what the leader bank is from replay
129 : (the become_leader message makes it from replay->pack->bank->poh)
130 : before it just makes it from replay->poh. This is rare but
131 : violates invariants in poh, so we simply do not process any
132 : transactions for mixin until we have learned what the leader bank
133 : is. */
134 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_BANK && !fd_poh_have_leader_bank( ctx->poh ) ) ) return 1;
135 :
136 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_REPLAY && fd_poh_have_leader_bank( ctx->poh ) ) ) return 1;
137 : /* If prior leaders skipped, it might happen that replay tells us to
138 : become leader, but poh is still hashing through the skipped slots
139 : and could not yet mixin any microblocks. In this case, we hold
140 : the microblocks and do not mixin them yet until we have hashed
141 : through to the actual leader slot.
142 :
143 : It might actually be allowed by the protocol to mixin earlier, but
144 : that really doesn't seem like a good idea.
145 :
146 : It's fine to block pack/banks on hashing here, because they we are
147 : going to have the wait for the full block to timeout once it starts */
148 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_BANK && fd_poh_hashing_to_leader_slot( ctx->poh ) ) ) return 1;
149 0 : if( FD_LIKELY( ctx->in_kind[ in_idx ]==IN_KIND_BANK || ctx->in_kind[ in_idx ]==IN_KIND_PACK ) ) {
150 0 : uint pack_idx = (uint)fd_disco_bank_sig_pack_idx( sig );
151 0 : if( FD_UNLIKELY( ((int)(pack_idx-ctx->expect_pack_idx))<0L ) ) FD_LOG_ERR(( "received out of order pack_idx %u (expecting %u)", pack_idx, ctx->expect_pack_idx ));
152 0 : if( FD_UNLIKELY( pack_idx!=ctx->expect_pack_idx ) ) return 1;
153 0 : ctx->expect_pack_idx++;
154 0 : }
155 :
156 0 : switch( ctx->in_kind[ in_idx ] ) {
157 0 : case IN_KIND_PACK: {
158 0 : fd_done_packing_t const * done_packing = fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk );
159 0 : fd_poh_done_packing( ctx->poh, done_packing->microblocks_in_slot );
160 0 : break;
161 0 : }
162 0 : case IN_KIND_REPLAY: {
163 0 : if( FD_LIKELY( sig==REPLAY_SIG_BECAME_LEADER ) ) {
164 0 : fd_became_leader_t const * became_leader = fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk );
165 0 : fd_poh_begin_leader( ctx->poh, became_leader->slot, became_leader->hashcnt_per_tick, became_leader->ticks_per_slot, became_leader->tick_duration_ns, became_leader->max_microblocks_in_slot );
166 0 : } else if( sig==REPLAY_SIG_RESET ) {
167 0 : fd_poh_reset_t const * reset = fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk );
168 0 : fd_poh_reset( ctx->poh, stem, reset->timestamp, reset->hashcnt_per_tick, reset->ticks_per_slot, reset->tick_duration_ns, reset->completed_slot, reset->completed_blockhash, reset->next_leader_slot, reset->max_microblocks_in_slot, reset->completed_block_id );
169 0 : }
170 0 : break;
171 0 : }
172 0 : case IN_KIND_BANK: {
173 0 : ulong target_slot = fd_disco_bank_sig_slot( sig );
174 0 : ulong txn_cnt = (sz-sizeof(fd_microblock_trailer_t))/sizeof(fd_txn_p_t);
175 0 : fd_txn_p_t const * txns = fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk );
176 0 : fd_microblock_trailer_t const * trailer = fd_type_pun_const( (uchar const*)txns+sz-sizeof(fd_microblock_trailer_t) );
177 0 : fd_poh1_mixin( ctx->poh, stem, target_slot, trailer->hash, txn_cnt, txns );
178 0 : break;
179 0 : }
180 0 : default: {
181 0 : FD_LOG_ERR(( "unexpected input kind %d", ctx->in_kind[ in_idx ] ));
182 0 : break;
183 0 : }
184 0 : }
185 :
186 0 : ctx->idle_cnt = 0UL;
187 0 : return 0;
188 0 : }
189 :
190 : static inline fd_poh_out_t
191 : out1( fd_topo_t const * topo,
192 : fd_topo_tile_t const * tile,
193 0 : char const * name ) {
194 0 : ulong idx = ULONG_MAX;
195 :
196 0 : for( ulong i=0UL; i<tile->out_cnt; i++ ) {
197 0 : fd_topo_link_t const * link = &topo->links[ tile->out_link_id[ i ] ];
198 0 : if( !strcmp( link->name, name ) ) {
199 0 : if( FD_UNLIKELY( idx!=ULONG_MAX ) ) FD_LOG_ERR(( "tile %s:%lu had multiple output links named %s but expected one", tile->name, tile->kind_id, name ));
200 0 : idx = i;
201 0 : }
202 0 : }
203 :
204 0 : if( FD_UNLIKELY( idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile %s:%lu had no output link named %s", tile->name, tile->kind_id, name ));
205 :
206 0 : void * mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ idx ] ].dcache_obj_id ].wksp_id ].wksp;
207 0 : ulong chunk0 = fd_dcache_compact_chunk0( mem, topo->links[ tile->out_link_id[ idx ] ].dcache );
208 0 : ulong wmark = fd_dcache_compact_wmark ( mem, topo->links[ tile->out_link_id[ idx ] ].dcache, topo->links[ tile->out_link_id[ idx ] ].mtu );
209 :
210 0 : return (fd_poh_out_t){ .idx = idx, .mem = mem, .chunk0 = chunk0, .wmark = wmark, .chunk = chunk0 };
211 0 : }
212 :
213 : static void
214 : unprivileged_init( fd_topo_t * topo,
215 0 : fd_topo_tile_t * tile ) {
216 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
217 :
218 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
219 0 : fd_poh_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_poh_tile_t ), sizeof( fd_poh_tile_t ) );
220 :
221 0 : ctx->expect_pack_idx = 0UL;
222 :
223 0 : ctx->in_cnt = tile->in_cnt;
224 0 : ctx->idle_cnt = 0UL;
225 :
226 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
227 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
228 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
229 :
230 0 : ctx->in[ i ].mem = link_wksp->wksp;
231 0 : ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
232 0 : ctx->in[ i ].wmark = fd_dcache_compact_wmark ( ctx->in[ i ].mem, link->dcache, link->mtu );
233 0 : ctx->in[ i ].mtu = link->mtu;
234 :
235 0 : if( !strcmp( link->name, "replay_out" ) ) ctx->in_kind[ i ] = IN_KIND_REPLAY;
236 0 : else if( !strcmp( link->name, "pack_poh" ) ) ctx->in_kind[ i ] = IN_KIND_PACK;
237 0 : else if( !strcmp( link->name, "bank_poh" ) ) ctx->in_kind[ i ] = IN_KIND_BANK;
238 0 : else FD_LOG_ERR(( "unexpected input link name %s", link->name ));
239 0 : }
240 :
241 0 : *ctx->shred_out = out1( topo, tile, "poh_shred" );
242 0 : *ctx->replay_out = out1( topo, tile, "poh_replay" );
243 :
244 0 : FD_TEST( fd_poh_join( fd_poh_new( ctx->poh ), ctx->shred_out, ctx->replay_out ) );
245 :
246 0 : ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, 1UL );
247 0 : if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
248 0 : FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
249 0 : }
250 :
251 : static ulong
252 : populate_allowed_seccomp( fd_topo_t const * topo,
253 : fd_topo_tile_t const * tile,
254 : ulong out_cnt,
255 0 : struct sock_filter * out ) {
256 0 : (void)topo;
257 0 : (void)tile;
258 :
259 0 : populate_sock_filter_policy_fd_poh_tile( out_cnt, out, (uint)fd_log_private_logfile_fd() );
260 0 : return sock_filter_policy_fd_poh_tile_instr_cnt;
261 0 : }
262 :
263 : static ulong
264 : populate_allowed_fds( fd_topo_t const * topo,
265 : fd_topo_tile_t const * tile,
266 : ulong out_fds_cnt,
267 0 : int * out_fds ) {
268 0 : (void)topo;
269 0 : (void)tile;
270 :
271 0 : if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
272 :
273 0 : ulong out_cnt = 0UL;
274 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
275 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
276 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
277 0 : return out_cnt;
278 0 : }
279 :
280 : /* One tick, one microblock, one slot ended */
281 0 : #define STEM_BURST (3UL)
282 :
283 : /* See explanation in fd_pack */
284 0 : #define STEM_LAZY (128L*3000L)
285 :
286 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_poh_tile_t
287 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_poh_tile_t)
288 :
289 0 : #define STEM_CALLBACK_AFTER_CREDIT after_credit
290 0 : #define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag
291 :
292 : #include "../../disco/stem/fd_stem.c"
293 :
294 : fd_topo_run_tile_t fd_tile_poh = {
295 : .name = "poh",
296 : .populate_allowed_seccomp = populate_allowed_seccomp,
297 : .populate_allowed_fds = populate_allowed_fds,
298 : .scratch_align = scratch_align,
299 : .scratch_footprint = scratch_footprint,
300 : .privileged_init = NULL,
301 : .unprivileged_init = unprivileged_init,
302 : .run = stem_run,
303 : };
|