Line data Source code
1 :
2 : #include "../fd_txn_m.h"
3 : #include "generated/fd_dedup_tile_seccomp.h"
4 :
5 : #include "../topo/fd_topo.h"
6 : #include "../metrics/fd_metrics.h"
7 :
8 : #include "../../discof/replay/fd_replay_tile.h"
9 :
10 : /* fd_dedup provides services to deduplicate multiple streams of input
11 : fragments and present them to a mix of reliable and unreliable
12 : consumers as though they were generated by a single multi-stream
13 : producer. */
14 :
15 0 : #define IN_KIND_GOSSIP (0UL)
16 0 : #define IN_KIND_VERIFY (1UL)
17 0 : #define IN_KIND_EXECUTED_TXN (2UL)
18 0 : #define IN_KIND_REPLAY (3UL)
19 :
20 : /* fd_dedup_in_ctx_t is a context object for each in (producer) mcache
21 : connected to the dedup tile. */
22 :
23 : typedef struct {
24 : fd_wksp_t * mem;
25 : ulong chunk0;
26 : ulong wmark;
27 : ulong mtu;
28 : } fd_dedup_in_ctx_t;
29 :
30 : /* fd_dedup_ctx_t is the context object provided to callbacks from
31 : fd_stem, and contains all state needed to progress the tile. */
32 :
33 : typedef struct {
34 : ulong tcache_depth; /* == fd_tcache_depth( tcache ), depth of this dedups's tcache (const) */
35 : ulong tcache_map_cnt; /* == fd_tcache_map_cnt( tcache ), number of slots to use for tcache map (const) */
36 : ulong * tcache_sync; /* == fd_tcache_oldest_laddr( tcache ), local join to the oldest key in the tcache */
37 : ulong * tcache_ring;
38 : ulong * tcache_map;
39 :
40 : ulong in_kind[ 64UL ];
41 : fd_dedup_in_ctx_t in[ 64UL ];
42 :
43 : int bundle_failed;
44 : ulong bundle_id;
45 : ulong bundle_idx;
46 : uchar bundle_signatures[ 4UL ][ 64UL ];
47 :
48 : fd_wksp_t * out_mem;
49 : ulong out_chunk0;
50 : ulong out_wmark;
51 : ulong out_chunk;
52 :
53 : ulong hashmap_seed;
54 :
55 : struct {
56 : ulong dedup_tile_result[ FD_METRICS_ENUM_DEDUP_TILE_RESULT_CNT ];
57 : } metrics;
58 : } fd_dedup_ctx_t;
59 :
60 : FD_FN_CONST static inline ulong
61 0 : scratch_align( void ) {
62 0 : return alignof( fd_dedup_ctx_t );
63 0 : }
64 :
65 : FD_FN_PURE static inline ulong
66 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
67 0 : ulong l = FD_LAYOUT_INIT;
68 0 : l = FD_LAYOUT_APPEND( l, alignof( fd_dedup_ctx_t ), sizeof( fd_dedup_ctx_t ) );
69 0 : l = FD_LAYOUT_APPEND( l, fd_tcache_align(), fd_tcache_footprint( tile->dedup.tcache_depth, 0UL ) );
70 0 : return FD_LAYOUT_FINI( l, scratch_align() );
71 0 : }
72 :
73 : static inline void
74 0 : metrics_write( fd_dedup_ctx_t * ctx ) {
75 0 : FD_MCNT_ENUM_COPY( DEDUP, TRANSACTION_RESULT, ctx->metrics.dedup_tile_result );
76 0 : }
77 :
78 : /* during_frag is called between pairs for sequence number checks, as
79 : we are reading incoming frags. We don't actually need to copy the
80 : fragment here, flow control prevents it getting overrun, and
81 : downstream consumers could reuse the same chunk and workspace to
82 : improve performance.
83 :
84 : The bounds checking and copying here are defensive measures,
85 :
86 : * In a functioning system, the bounds checking should never fail,
87 : but we want to prevent an attacker with code execution on a producer
88 : tile from trivially being able to jump to a consumer tile with
89 : out of bounds chunks.
90 :
91 : * For security reasons, we have chosen to isolate all workspaces from
92 : one another, so for example, if the QUIC tile is compromised with
93 : RCE, it cannot wait until the sigverify tile has verified a transaction,
94 : and then overwrite the transaction while it's being processed by the
95 : banking stage. */
96 :
97 : static inline void
98 : during_frag( fd_dedup_ctx_t * ctx,
99 : ulong in_idx,
100 : ulong seq FD_PARAM_UNUSED,
101 : ulong sig,
102 : ulong chunk,
103 : ulong sz,
104 0 : ulong ctl FD_PARAM_UNUSED ) {
105 :
106 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) )
107 0 : FD_LOG_ERR(( "chunk %lu %lu corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
108 :
109 0 : uchar * src = (uchar *)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
110 0 : uchar * dst = (uchar *)fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
111 :
112 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_GOSSIP ) ) {
113 0 : if( FD_UNLIKELY( sz>FD_TPU_RAW_MTU ) ) FD_LOG_ERR(( "received a gossip transaction that was too large" ));
114 0 : fd_memcpy( dst, src, sz );
115 :
116 0 : fd_txn_m_t const * txnm = (fd_txn_m_t const *)dst;
117 0 : if( FD_UNLIKELY( txnm->payload_sz>FD_TPU_MTU ) ) {
118 0 : FD_LOG_ERR(( "vote txn payload size %hu exceeds max %lu", txnm->payload_sz, FD_TPU_MTU ));
119 0 : }
120 0 : } else if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_REPLAY ) ) {
121 0 : if( FD_LIKELY( sig==REPLAY_SIG_TXN_EXECUTED ) ) {
122 0 : fd_replay_txn_executed_t * txn_executed = fd_type_pun( src );
123 0 : if( FD_UNLIKELY( !txn_executed->is_committable ) ) return;
124 0 : ulong ha_dedup_tag = fd_hash( ctx->hashmap_seed, fd_txn_get_signatures( TXN(txn_executed->txn), txn_executed->txn->payload ), FD_TXN_SIGNATURE_SZ );
125 0 : int _is_dup;
126 0 : FD_TCACHE_INSERT( _is_dup, *ctx->tcache_sync, ctx->tcache_ring, ctx->tcache_depth, ctx->tcache_map, ctx->tcache_map_cnt, ha_dedup_tag );
127 0 : (void)_is_dup;
128 0 : }
129 0 : } else if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_EXECUTED_TXN ) ) { /* Frankendancer-only */
130 0 : if( FD_UNLIKELY( sz!=FD_TXN_SIGNATURE_SZ ) ) FD_LOG_ERR(( "received an executed transaction signature message with the wrong size %lu", sz ));
131 : /* Executed txns just have their signature inserted into the tcache
132 : so we can dedup them easily. */
133 0 : ulong ha_dedup_tag = fd_hash( ctx->hashmap_seed, src, FD_TXN_SIGNATURE_SZ );
134 0 : int _is_dup;
135 0 : FD_TCACHE_INSERT( _is_dup, *ctx->tcache_sync, ctx->tcache_ring, ctx->tcache_depth, ctx->tcache_map, ctx->tcache_map_cnt, ha_dedup_tag );
136 0 : (void)_is_dup;
137 0 : } else {
138 0 : fd_memcpy( dst, src, sz );
139 0 : }
140 0 : }
141 :
142 : /* After the transaction has been fully received, and we know we were
143 : not overrun while reading it, check if it's a duplicate of a prior
144 : transaction.
145 :
146 : If the transaction came in from the gossip link, then it hasn't been
147 : parsed by us. So parse it here if necessary. */
148 :
149 : static inline void
150 : after_frag( fd_dedup_ctx_t * ctx,
151 : ulong in_idx,
152 : ulong seq,
153 : ulong sig,
154 : ulong sz,
155 : ulong tsorig,
156 : ulong _tspub,
157 0 : fd_stem_context_t * stem ) {
158 0 : (void)seq;
159 0 : (void)sig;
160 0 : (void)sz;
161 0 : (void)_tspub;
162 :
163 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_EXECUTED_TXN ) ) return;
164 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_REPLAY ) ) return;
165 :
166 0 : fd_txn_m_t * txnm = (fd_txn_m_t *)fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
167 0 : FD_TEST( txnm->payload_sz<=FD_TPU_MTU );
168 0 : fd_txn_t * txn = fd_txn_m_txn_t( txnm );
169 :
170 0 : if( FD_UNLIKELY( txnm->block_engine.bundle_id && (txnm->block_engine.bundle_id!=ctx->bundle_id) ) ) {
171 0 : ctx->bundle_failed = 0;
172 0 : ctx->bundle_id = txnm->block_engine.bundle_id;
173 0 : ctx->bundle_idx = 0UL;
174 0 : }
175 :
176 0 : if( FD_UNLIKELY( txnm->block_engine.bundle_id && ctx->bundle_failed ) ) {
177 0 : ctx->metrics.dedup_tile_result[ FD_METRICS_ENUM_DEDUP_TILE_RESULT_V_BUNDLE_PEER_FAILURE_IDX ]++;
178 0 : return;
179 0 : }
180 :
181 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_GOSSIP ) ) {
182 : /* Transactions coming in from these links are not parsed.
183 :
184 : We'll need to parse it so it's ready for downstream consumers.
185 : Equally importantly, we need to parse to extract the signature
186 : for dedup. Just parse it right into the output dcache. */
187 0 : txnm->txn_t_sz = (ushort)fd_txn_parse( fd_txn_m_payload( txnm ), txnm->payload_sz, txn, NULL );
188 0 : if( FD_UNLIKELY( !txnm->txn_t_sz ) ) FD_LOG_ERR(( "fd_txn_parse failed for vote transactions that should have been sigverified" ));
189 :
190 0 : FD_MCNT_INC( DEDUP, GOSSIPED_VOTES_RECEIVED, 1UL );
191 0 : }
192 :
193 0 : int is_dup = 0;
194 0 : if( FD_LIKELY( !txnm->block_engine.bundle_id ) ) {
195 : /* Compute fd_hash(signature) for dedup. */
196 0 : ulong ha_dedup_tag = fd_hash( ctx->hashmap_seed, fd_txn_m_payload( txnm )+txn->signature_off, 64UL );
197 :
198 0 : FD_TCACHE_INSERT( is_dup, *ctx->tcache_sync, ctx->tcache_ring, ctx->tcache_depth, ctx->tcache_map, ctx->tcache_map_cnt, ha_dedup_tag );
199 0 : } else {
200 : /* Make sure bundles don't contain a duplicate transaction inside
201 : the bundle, which would not be valid. */
202 :
203 0 : if( FD_UNLIKELY( ctx->bundle_idx>4UL ) ) FD_LOG_ERR(( "bundle_idx %lu > 4", ctx->bundle_idx ));
204 :
205 0 : for( ulong i=0UL; i<ctx->bundle_idx; i++ ) {
206 0 : if( !memcmp( ctx->bundle_signatures[ i ], fd_txn_m_payload( txnm )+txn->signature_off, 64UL ) ) {
207 0 : is_dup = 1;
208 0 : break;
209 0 : }
210 0 : }
211 :
212 0 : if( FD_UNLIKELY( ctx->bundle_idx==4UL ) ) ctx->bundle_idx++;
213 0 : else fd_memcpy( ctx->bundle_signatures[ ctx->bundle_idx++ ], fd_txn_m_payload( txnm )+txn->signature_off, 64UL );
214 0 : }
215 :
216 0 : if( FD_LIKELY( is_dup ) ) {
217 0 : if( FD_UNLIKELY( txnm->block_engine.bundle_id ) ) ctx->bundle_failed = 1;
218 :
219 0 : ctx->metrics.dedup_tile_result[ FD_METRICS_ENUM_DEDUP_TILE_RESULT_V_DEDUP_FAILURE_IDX ]++;
220 0 : } else {
221 0 : ulong realized_sz = fd_txn_m_realized_footprint( txnm, 1, 0 );
222 0 : ulong tspub = (ulong)fd_frag_meta_ts_comp( fd_tickcount() );
223 0 : fd_stem_publish( stem, 0UL, 0, ctx->out_chunk, realized_sz, 0UL, tsorig, tspub );
224 0 : ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, realized_sz, ctx->out_chunk0, ctx->out_wmark );
225 :
226 0 : ctx->metrics.dedup_tile_result[ FD_METRICS_ENUM_DEDUP_TILE_RESULT_V_SUCCESS_IDX ]++;
227 0 : }
228 0 : }
229 :
230 : static void
231 : privileged_init( fd_topo_t * topo,
232 0 : fd_topo_tile_t * tile ) {
233 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
234 :
235 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
236 0 : fd_dedup_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_dedup_ctx_t ), sizeof( fd_dedup_ctx_t ) );
237 0 : FD_TEST( fd_rng_secure( &ctx->hashmap_seed, 8U ) );
238 0 : }
239 :
240 : static void
241 : unprivileged_init( fd_topo_t * topo,
242 0 : fd_topo_tile_t * tile ) {
243 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
244 :
245 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
246 0 : fd_dedup_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_dedup_ctx_t ), sizeof( fd_dedup_ctx_t ) );
247 0 : fd_tcache_t * tcache = fd_tcache_join( fd_tcache_new( FD_SCRATCH_ALLOC_APPEND( l, fd_tcache_align(), fd_tcache_footprint( tile->dedup.tcache_depth, 0) ), tile->dedup.tcache_depth, 0 ) );
248 0 : if( FD_UNLIKELY( !tcache ) ) FD_LOG_ERR(( "fd_tcache_new failed" ));
249 :
250 0 : ctx->bundle_failed = 0;
251 0 : ctx->bundle_id = 0UL;
252 0 : ctx->bundle_idx = 0UL;
253 :
254 0 : memset( &ctx->metrics, 0, sizeof( ctx->metrics ) );
255 :
256 0 : ctx->tcache_depth = fd_tcache_depth ( tcache );
257 0 : ctx->tcache_map_cnt = fd_tcache_map_cnt ( tcache );
258 0 : ctx->tcache_sync = fd_tcache_oldest_laddr( tcache );
259 0 : ctx->tcache_ring = fd_tcache_ring_laddr ( tcache );
260 0 : ctx->tcache_map = fd_tcache_map_laddr ( tcache );
261 :
262 0 : FD_TEST( tile->in_cnt<=sizeof( ctx->in )/sizeof( ctx->in[ 0 ] ) );
263 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
264 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
265 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
266 :
267 0 : ctx->in[i].mem = link_wksp->wksp;
268 0 : ctx->in[i].mtu = link->mtu;
269 0 : ctx->in[i].chunk0 = fd_dcache_compact_chunk0( ctx->in[i].mem, link->dcache );
270 0 : ctx->in[i].wmark = fd_dcache_compact_wmark ( ctx->in[i].mem, link->dcache, link->mtu );
271 :
272 0 : if( !strcmp( link->name, "gossip_dedup" ) ) {
273 0 : ctx->in_kind[ i ] = IN_KIND_GOSSIP;
274 0 : } else if( !strcmp( link->name, "verify_dedup" ) ) {
275 0 : ctx->in_kind[ i ] = IN_KIND_VERIFY;
276 0 : } else if( !strcmp( link->name, "executed_txn" ) ) {
277 0 : ctx->in_kind[ i ] = IN_KIND_EXECUTED_TXN;
278 0 : } else if( !strcmp( link->name, "replay_out" ) ) {
279 0 : ctx->in_kind[ i ] = IN_KIND_REPLAY;
280 0 : } else {
281 0 : FD_LOG_ERR(( "unexpected link name %s", link->name ));
282 0 : }
283 0 : }
284 :
285 0 : ctx->out_mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ 0 ] ].dcache_obj_id ].wksp_id ].wksp;
286 0 : ctx->out_chunk0 = fd_dcache_compact_chunk0( ctx->out_mem, topo->links[ tile->out_link_id[ 0 ] ].dcache );
287 0 : ctx->out_wmark = fd_dcache_compact_wmark ( ctx->out_mem, topo->links[ tile->out_link_id[ 0 ] ].dcache, topo->links[ tile->out_link_id[ 0 ] ].mtu );
288 0 : ctx->out_chunk = ctx->out_chunk0;
289 :
290 0 : ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, 1UL );
291 0 : if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
292 0 : FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
293 0 : }
294 :
295 : static ulong
296 : populate_allowed_seccomp( fd_topo_t const * topo,
297 : fd_topo_tile_t const * tile,
298 : ulong out_cnt,
299 0 : struct sock_filter * out ) {
300 0 : (void)topo;
301 0 : (void)tile;
302 :
303 0 : populate_sock_filter_policy_fd_dedup_tile( out_cnt, out, (uint)fd_log_private_logfile_fd() );
304 0 : return sock_filter_policy_fd_dedup_tile_instr_cnt;
305 0 : }
306 :
307 : static ulong
308 : populate_allowed_fds( fd_topo_t const * topo,
309 : fd_topo_tile_t const * tile,
310 : ulong out_fds_cnt,
311 0 : int * out_fds ) {
312 0 : (void)topo;
313 0 : (void)tile;
314 :
315 0 : if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
316 :
317 0 : ulong out_cnt = 0UL;
318 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
319 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
320 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
321 0 : return out_cnt;
322 0 : }
323 :
324 0 : #define STEM_BURST (1UL)
325 :
326 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_dedup_ctx_t
327 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_dedup_ctx_t)
328 :
329 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
330 0 : #define STEM_CALLBACK_DURING_FRAG during_frag
331 0 : #define STEM_CALLBACK_AFTER_FRAG after_frag
332 :
333 : #include "../stem/fd_stem.c"
334 :
335 : fd_topo_run_tile_t fd_tile_dedup = {
336 : .name = "dedup",
337 : .populate_allowed_seccomp = populate_allowed_seccomp,
338 : .populate_allowed_fds = populate_allowed_fds,
339 : .scratch_align = scratch_align,
340 : .scratch_footprint = scratch_footprint,
341 : .privileged_init = privileged_init,
342 : .unprivileged_init = unprivileged_init,
343 : .run = stem_run,
344 : };
|