Line data Source code
1 : #include "../../../../disco/tiles.h"
2 : #include "fd_verify.h"
3 :
4 : #include "generated/dedup_seccomp.h"
5 :
6 : #include "../../../../disco/metrics/fd_metrics.h"
7 :
8 : #include <linux/unistd.h>
9 :
10 : /* fd_dedup provides services to deduplicate multiple streams of input
11 : fragments and present them to a mix of reliable and unreliable
12 : consumers as though they were generated by a single multi-stream
13 : producer.
14 :
15 : The dedup tile is simply a wrapper around the mux tile, that also
16 : checks the transaction signature field for duplicates and filters
17 : them out. */
18 :
19 : #define GOSSIP_IN_IDX (0UL) /* Frankendancer and Firedancer */
20 : #define VOTER_IN_IDX (1UL) /* Firedancer only */
21 :
22 : /* fd_dedup_in_ctx_t is a context object for each in (producer) mcache
23 : connected to the dedup tile. */
24 :
25 : typedef struct {
26 : fd_wksp_t * mem;
27 : ulong chunk0;
28 : ulong wmark;
29 : } fd_dedup_in_ctx_t;
30 :
31 : /* fd_dedup_ctx_t is the context object provided to callbacks from the
32 : mux tile, and contains all state needed to progress the tile. */
33 :
34 : typedef struct {
35 : ulong tcache_depth; /* == fd_tcache_depth( tcache ), depth of this dedups's tcache (const) */
36 : ulong tcache_map_cnt; /* == fd_tcache_map_cnt( tcache ), number of slots to use for tcache map (const) */
37 : ulong * tcache_sync; /* == fd_tcache_oldest_laddr( tcache ), local join to the oldest key in the tcache */
38 : ulong * tcache_ring;
39 : ulong * tcache_map;
40 :
41 : /* The first unparsed_in_cnt in links do not have the parsed fd_txn_t
42 : in the payload trailer. */
43 : ulong unparsed_in_cnt;
44 : fd_dedup_in_ctx_t in[ 64UL ];
45 :
46 : fd_wksp_t * out_mem;
47 : ulong out_chunk0;
48 : ulong out_wmark;
49 : ulong out_chunk;
50 :
51 : ulong hashmap_seed;
52 :
53 : struct {
54 : ulong dedup_fail_cnt;
55 : } metrics;
56 : } fd_dedup_ctx_t;
57 :
58 : FD_FN_CONST static inline ulong
59 3 : scratch_align( void ) {
60 3 : return alignof( fd_dedup_ctx_t );
61 3 : }
62 :
63 : FD_FN_PURE static inline ulong
64 3 : scratch_footprint( fd_topo_tile_t const * tile ) {
65 3 : (void)tile;
66 3 : ulong l = FD_LAYOUT_INIT;
67 3 : l = FD_LAYOUT_APPEND( l, alignof( fd_dedup_ctx_t ), sizeof( fd_dedup_ctx_t ) );
68 3 : l = FD_LAYOUT_APPEND( l, fd_tcache_align(), fd_tcache_footprint( tile->dedup.tcache_depth, 0 ) );
69 3 : return FD_LAYOUT_FINI( l, scratch_align() );
70 3 : }
71 :
72 : static inline void
73 0 : metrics_write( fd_dedup_ctx_t * ctx ) {
74 0 : FD_MCNT_SET( DEDUP, TRANSACTION_DEDUP_FAILURE, ctx->metrics.dedup_fail_cnt );
75 0 : }
76 :
77 : /* during_frag is called between pairs for sequence number checks, as
78 : we are reading incoming frags. We don't actually need to copy the
79 : fragment here, flow control prevents it getting overrun, and
80 : downstream consumers could reuse the same chunk and workspace to
81 : improve performance.
82 :
83 : The bounds checking and copying here are defensive measures,
84 :
85 : * In a functioning system, the bounds checking should never fail,
86 : but we want to prevent an attacker with code execution on a producer
87 : tile from trivially being able to jump to a consumer tile with
88 : out of bounds chunks.
89 :
90 : * For security reasons, we have chosen to isolate all workspaces from
91 : one another, so for example, if the QUIC tile is compromised with
92 : RCE, it cannot wait until the sigverify tile has verified a transaction,
93 : and then overwrite the transaction while it's being processed by the
94 : banking stage. */
95 :
96 : static inline void
97 : during_frag( fd_dedup_ctx_t * ctx,
98 : ulong in_idx,
99 : ulong seq,
100 : ulong sig,
101 : ulong chunk,
102 0 : ulong sz ) {
103 0 : (void)seq;
104 0 : (void)sig;
105 :
106 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz > FD_TPU_DCACHE_MTU ) )
107 0 : FD_LOG_ERR(( "chunk %lu %lu corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
108 :
109 0 : uchar * src = (uchar *)fd_chunk_to_laddr( ctx->in[in_idx].mem, chunk );
110 0 : uchar * dst = (uchar *)fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
111 :
112 0 : fd_memcpy( dst, src, sz );
113 0 : }
114 :
115 : /* After the transaction has been fully received, and we know we were
116 : not overrun while reading it, check if it's a duplicate of a prior
117 : transaction.
118 :
119 : If the transaction came in from the gossip link, then it hasn't been
120 : parsed by us. So parse it here if necessary. */
121 :
122 : static inline void
123 : after_frag( fd_dedup_ctx_t * ctx,
124 : ulong in_idx,
125 : ulong seq,
126 : ulong sig,
127 : ulong chunk,
128 : ulong sz,
129 : ulong tsorig,
130 0 : fd_stem_context_t * stem ) {
131 0 : (void)seq;
132 0 : (void)sig;
133 0 : (void)chunk;
134 :
135 : /* Transactions coming from verify tile, already parsed.
136 : We need to reconstruct fd_txn_t * txn, because we need the
137 : tx signature to compute the dedup tag.
138 : To find the position of fd_txn_t * txn, we need the (udp)
139 : payload_sz that's stored as ushort at the end of the
140 : dcache_entry. */
141 0 : uchar * dcache_entry = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
142 0 : ushort * payload_sz_p = (ushort *)(dcache_entry + sz - sizeof(ushort));
143 0 : ulong payload_sz = *payload_sz_p;
144 0 : ulong txn_off = fd_ulong_align_up( payload_sz, 2UL );
145 0 : fd_txn_t * txn = (fd_txn_t *)(dcache_entry + txn_off);
146 :
147 0 : if ( FD_UNLIKELY( in_idx < ctx->unparsed_in_cnt ) ) {
148 : /* Transactions coming in from these links are not parsed.
149 :
150 : We'll need to parse it so it's ready for downstream consumers.
151 : Equally importantly, we need to parse to extract the signature
152 : for dedup. Just parse it right into the output dcache. */
153 :
154 : /* Here, *opt_sz is the size of udp payload, as the tx has not
155 : been parsed yet. Code here is similar to the verify tile. */
156 0 : ulong payload_sz = sz;
157 0 : ulong txn_off = fd_ulong_align_up( payload_sz, 2UL );
158 :
159 : /* Ensure sufficient trailing space for parsing. */
160 0 : if( FD_UNLIKELY( txn_off>FD_TPU_DCACHE_MTU - FD_TXN_MAX_SZ - sizeof(ushort)) ) {
161 0 : FD_LOG_ERR(( "got malformed txn (sz %lu) insufficient space left in dcache for fd_txn_t", payload_sz ));
162 0 : }
163 :
164 0 : txn = (fd_txn_t *)(dcache_entry + txn_off);
165 0 : ulong txn_t_sz = fd_txn_parse( dcache_entry, payload_sz, txn, NULL );
166 0 : if( FD_UNLIKELY( !txn_t_sz ) ) FD_LOG_ERR(( "fd_txn_parse failed for vote transactions that should have been sigverified" ));
167 :
168 : /* Increment on GOSSIP_IN_IDX but not VOTER_IN_IDX */
169 0 : FD_MCNT_INC( DEDUP, GOSSIPED_VOTES_RECEIVED, 1UL - in_idx );
170 :
171 : /* Write payload_sz into trailer.
172 : fd_txn_parse always returns a multiple of 2 so this sz is
173 : correctly aligned. */
174 0 : payload_sz_p = (ushort *)((ulong)txn + txn_t_sz);
175 0 : *payload_sz_p = (ushort)payload_sz;
176 :
177 : /* End of parsed message. */
178 :
179 : /* Paranoia post parsing. */
180 0 : ulong new_sz = ( (ulong)payload_sz_p + sizeof(ushort) ) - (ulong)dcache_entry;
181 0 : if( FD_UNLIKELY( new_sz>FD_TPU_DCACHE_MTU ) ) {
182 0 : FD_LOG_CRIT(( "memory corruption detected (txn_sz=%lu txn_t_sz=%lu)",
183 0 : payload_sz, txn_t_sz ));
184 0 : }
185 :
186 : /* Write new size for mcache. */
187 0 : sz = new_sz;
188 0 : }
189 :
190 : /* Compute fd_hash(signature) for dedup. */
191 0 : ulong ha_dedup_tag = fd_hash( ctx->hashmap_seed, dcache_entry + txn->signature_off, 64UL );
192 :
193 0 : int is_dup;
194 0 : FD_TCACHE_INSERT( is_dup, *ctx->tcache_sync, ctx->tcache_ring, ctx->tcache_depth, ctx->tcache_map, ctx->tcache_map_cnt, ha_dedup_tag );
195 0 : if( FD_LIKELY( is_dup ) ) {
196 0 : ctx->metrics.dedup_fail_cnt++;
197 0 : } else {
198 0 : fd_stem_publish( stem, 0UL, 0, ctx->out_chunk, sz, 0UL, tsorig, 0UL );
199 0 : ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, sz, ctx->out_chunk0, ctx->out_wmark );
200 0 : }
201 0 : }
202 :
203 : static void
204 : privileged_init( fd_topo_t * topo,
205 0 : fd_topo_tile_t * tile ) {
206 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
207 :
208 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
209 0 : fd_dedup_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_dedup_ctx_t ), sizeof( fd_dedup_ctx_t ) );
210 0 : FD_TEST( fd_rng_secure( &ctx->hashmap_seed, 8U ) );
211 0 : }
212 :
213 : static void
214 : unprivileged_init( fd_topo_t * topo,
215 0 : fd_topo_tile_t * tile ) {
216 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
217 :
218 : /* Frankendancer has gossip_dedup, verify_dedup+
219 : Firedancer has gossip_dedup, voter_dedup, verify_dedup+ */
220 0 : ulong unparsed_in_cnt = 1;
221 0 : if( FD_UNLIKELY( tile->in_cnt<2UL ) ) {
222 0 : FD_LOG_ERR(( "dedup tile needs at least two input links, got %lu", tile->in_cnt ));
223 0 : } else if( FD_UNLIKELY( strcmp( topo->links[ tile->in_link_id[ GOSSIP_IN_IDX ] ].name, "gossip_dedup" ) ) ) {
224 : /* We have one link for gossip messages... */
225 0 : FD_LOG_ERR(( "dedup tile has unexpected input links %lu %lu %s",
226 0 : tile->in_cnt, GOSSIP_IN_IDX, topo->links[ tile->in_link_id[ GOSSIP_IN_IDX ] ].name ));
227 0 : } else {
228 : /* ...followed by a voter_dedup link if it were the Firedancer topology */
229 0 : ulong voter_dedup_idx = fd_topo_find_tile_in_link( topo, tile, "voter_dedup", 0 );
230 0 : if( voter_dedup_idx!=ULONG_MAX ) {
231 0 : FD_TEST( voter_dedup_idx == VOTER_IN_IDX );
232 0 : unparsed_in_cnt = 2;
233 0 : } else {
234 0 : unparsed_in_cnt = 1;
235 0 : }
236 :
237 : /* ...followed by a sequence of verify_dedup links */
238 0 : for( ulong i=unparsed_in_cnt; i<tile->in_cnt; i++ ) {
239 0 : if( FD_UNLIKELY( strcmp( topo->links[ tile->in_link_id[ i ] ].name, "verify_dedup" ) ) ) {
240 0 : FD_LOG_ERR(( "dedup tile has unexpected input links %lu %lu %s",
241 0 : tile->in_cnt, i, topo->links[ tile->in_link_id[ i ] ].name ));
242 0 : }
243 0 : }
244 0 : }
245 :
246 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
247 0 : fd_dedup_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_dedup_ctx_t ), sizeof( fd_dedup_ctx_t ) );
248 0 : fd_tcache_t * tcache = fd_tcache_join( fd_tcache_new( FD_SCRATCH_ALLOC_APPEND( l, FD_TCACHE_ALIGN, fd_tcache_footprint( tile->dedup.tcache_depth, 0) ), tile->dedup.tcache_depth, 0 ) );
249 0 : if( FD_UNLIKELY( !tcache ) ) FD_LOG_ERR(( "fd_tcache_new failed" ));
250 :
251 0 : ctx->tcache_depth = fd_tcache_depth ( tcache );
252 0 : ctx->tcache_map_cnt = fd_tcache_map_cnt ( tcache );
253 0 : ctx->tcache_sync = fd_tcache_oldest_laddr( tcache );
254 0 : ctx->tcache_ring = fd_tcache_ring_laddr ( tcache );
255 0 : ctx->tcache_map = fd_tcache_map_laddr ( tcache );
256 :
257 0 : FD_TEST( tile->in_cnt<=sizeof( ctx->in )/sizeof( ctx->in[ 0 ] ) );
258 0 : ctx->unparsed_in_cnt = unparsed_in_cnt;
259 0 : for( ulong i=0; i<tile->in_cnt; i++ ) {
260 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
261 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
262 :
263 0 : ctx->in[i].mem = link_wksp->wksp;
264 0 : ctx->in[i].chunk0 = fd_dcache_compact_chunk0( ctx->in[i].mem, link->dcache );
265 0 : ctx->in[i].wmark = fd_dcache_compact_wmark ( ctx->in[i].mem, link->dcache, link->mtu );
266 0 : }
267 :
268 0 : ctx->out_mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ 0 ] ].dcache_obj_id ].wksp_id ].wksp;
269 0 : ctx->out_chunk0 = fd_dcache_compact_chunk0( ctx->out_mem, topo->links[ tile->out_link_id[ 0 ] ].dcache );
270 0 : ctx->out_wmark = fd_dcache_compact_wmark ( ctx->out_mem, topo->links[ tile->out_link_id[ 0 ] ].dcache, topo->links[ tile->out_link_id[ 0 ] ].mtu );
271 0 : ctx->out_chunk = ctx->out_chunk0;
272 :
273 0 : ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, 1UL );
274 0 : if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
275 0 : FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
276 0 : }
277 :
278 : static ulong
279 : populate_allowed_seccomp( fd_topo_t const * topo,
280 : fd_topo_tile_t const * tile,
281 : ulong out_cnt,
282 0 : struct sock_filter * out ) {
283 0 : (void)topo;
284 0 : (void)tile;
285 :
286 0 : populate_sock_filter_policy_dedup( out_cnt, out, (uint)fd_log_private_logfile_fd() );
287 0 : return sock_filter_policy_dedup_instr_cnt;
288 0 : }
289 :
290 : static ulong
291 : populate_allowed_fds( fd_topo_t const * topo,
292 : fd_topo_tile_t const * tile,
293 : ulong out_fds_cnt,
294 0 : int * out_fds ) {
295 0 : (void)topo;
296 0 : (void)tile;
297 :
298 0 : if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
299 :
300 0 : ulong out_cnt = 0UL;
301 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
302 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
303 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
304 0 : return out_cnt;
305 0 : }
306 :
307 0 : #define STEM_BURST (1UL)
308 :
309 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_dedup_ctx_t
310 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_dedup_ctx_t)
311 :
312 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
313 0 : #define STEM_CALLBACK_DURING_FRAG during_frag
314 0 : #define STEM_CALLBACK_AFTER_FRAG after_frag
315 :
316 : #include "../../../../disco/stem/fd_stem.c"
317 :
318 : fd_topo_run_tile_t fd_tile_dedup = {
319 : .name = "dedup",
320 : .populate_allowed_seccomp = populate_allowed_seccomp,
321 : .populate_allowed_fds = populate_allowed_fds,
322 : .scratch_align = scratch_align,
323 : .scratch_footprint = scratch_footprint,
324 : .privileged_init = privileged_init,
325 : .unprivileged_init = unprivileged_init,
326 : .run = stem_run,
327 : };
|