Line data Source code
1 : #include "../../../../disco/tiles.h"
2 : #include "fd_verify.h"
3 :
4 : #include "generated/dedup_seccomp.h"
5 :
6 : #include "../../../../disco/metrics/fd_metrics.h"
7 :
8 : #include <linux/unistd.h>
9 :
10 : /* fd_dedup provides services to deduplicate multiple streams of input
11 : fragments and present them to a mix of reliable and unreliable
12 : consumers as though they were generated by a single multi-stream
13 : producer.
14 :
15 : The dedup tile is simply a wrapper around the mux tile, that also
16 : checks the transaction signature field for duplicates and filters
17 : them out. */
18 :
19 0 : #define IN_KIND_GOSSIP (0UL)
20 0 : #define IN_KIND_VOTER (1UL)
21 0 : #define IN_KIND_VERIFY (2UL)
22 :
23 : /* fd_dedup_in_ctx_t is a context object for each in (producer) mcache
24 : connected to the dedup tile. */
25 :
26 : typedef struct {
27 : fd_wksp_t * mem;
28 : ulong chunk0;
29 : ulong wmark;
30 : } fd_dedup_in_ctx_t;
31 :
32 : /* fd_dedup_ctx_t is the context object provided to callbacks from the
33 : mux tile, and contains all state needed to progress the tile. */
34 :
35 : typedef struct {
36 : ulong tcache_depth; /* == fd_tcache_depth( tcache ), depth of this dedups's tcache (const) */
37 : ulong tcache_map_cnt; /* == fd_tcache_map_cnt( tcache ), number of slots to use for tcache map (const) */
38 : ulong * tcache_sync; /* == fd_tcache_oldest_laddr( tcache ), local join to the oldest key in the tcache */
39 : ulong * tcache_ring;
40 : ulong * tcache_map;
41 :
42 : ulong in_kind[ 64UL ];
43 : fd_dedup_in_ctx_t in[ 64UL ];
44 :
45 : fd_wksp_t * out_mem;
46 : ulong out_chunk0;
47 : ulong out_wmark;
48 : ulong out_chunk;
49 :
50 : ulong hashmap_seed;
51 :
52 : struct {
53 : ulong dedup_fail_cnt;
54 : } metrics;
55 : } fd_dedup_ctx_t;
56 :
57 : FD_FN_CONST static inline ulong
58 3 : scratch_align( void ) {
59 3 : return alignof( fd_dedup_ctx_t );
60 3 : }
61 :
62 : FD_FN_PURE static inline ulong
63 3 : scratch_footprint( fd_topo_tile_t const * tile ) {
64 3 : ulong l = FD_LAYOUT_INIT;
65 3 : l = FD_LAYOUT_APPEND( l, alignof( fd_dedup_ctx_t ), sizeof( fd_dedup_ctx_t ) );
66 3 : l = FD_LAYOUT_APPEND( l, fd_tcache_align(), fd_tcache_footprint( tile->dedup.tcache_depth, 0UL ) );
67 3 : return FD_LAYOUT_FINI( l, scratch_align() );
68 3 : }
69 :
70 : static inline void
71 0 : metrics_write( fd_dedup_ctx_t * ctx ) {
72 0 : FD_MCNT_SET( DEDUP, TRANSACTION_DEDUP_FAILURE, ctx->metrics.dedup_fail_cnt );
73 0 : }
74 :
75 : /* during_frag is called between pairs for sequence number checks, as
76 : we are reading incoming frags. We don't actually need to copy the
77 : fragment here, flow control prevents it getting overrun, and
78 : downstream consumers could reuse the same chunk and workspace to
79 : improve performance.
80 :
81 : The bounds checking and copying here are defensive measures,
82 :
83 : * In a functioning system, the bounds checking should never fail,
84 : but we want to prevent an attacker with code execution on a producer
85 : tile from trivially being able to jump to a consumer tile with
86 : out of bounds chunks.
87 :
88 : * For security reasons, we have chosen to isolate all workspaces from
89 : one another, so for example, if the QUIC tile is compromised with
90 : RCE, it cannot wait until the sigverify tile has verified a transaction,
91 : and then overwrite the transaction while it's being processed by the
92 : banking stage. */
93 :
94 : static inline void
95 : during_frag( fd_dedup_ctx_t * ctx,
96 : ulong in_idx,
97 : ulong seq,
98 : ulong sig,
99 : ulong chunk,
100 0 : ulong sz ) {
101 0 : (void)seq;
102 0 : (void)sig;
103 :
104 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>FD_TPU_PARSED_MTU ) )
105 0 : FD_LOG_ERR(( "chunk %lu %lu corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
106 :
107 0 : uchar * src = (uchar *)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
108 0 : uchar * dst = (uchar *)fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
109 :
110 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_GOSSIP || ctx->in_kind[ in_idx ]==IN_KIND_VOTER ) ) {
111 0 : if( FD_UNLIKELY( sz>FD_TPU_MTU ) ) FD_LOG_ERR(( "received a gossip or voter transaction that was too large" ));
112 :
113 0 : fd_txn_m_t * txnm = (fd_txn_m_t *)dst;
114 0 : txnm->payload_sz = (ushort)sz;
115 0 : fd_memcpy( fd_txn_m_payload( txnm ), src, sz );
116 0 : } else {
117 0 : fd_memcpy( dst, src, sz );
118 0 : }
119 0 : }
120 :
121 : /* After the transaction has been fully received, and we know we were
122 : not overrun while reading it, check if it's a duplicate of a prior
123 : transaction.
124 :
125 : If the transaction came in from the gossip link, then it hasn't been
126 : parsed by us. So parse it here if necessary. */
127 :
128 : static inline void
129 : after_frag( fd_dedup_ctx_t * ctx,
130 : ulong in_idx,
131 : ulong seq,
132 : ulong sig,
133 : ulong sz,
134 : ulong tsorig,
135 0 : fd_stem_context_t * stem ) {
136 0 : (void)seq;
137 0 : (void)sig;
138 0 : (void)sz;
139 :
140 0 : fd_txn_m_t * txnm = (fd_txn_m_t *)fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
141 0 : fd_txn_t * txn = fd_txn_m_txn_t( txnm );
142 :
143 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_GOSSIP || ctx->in_kind[ in_idx]==IN_KIND_VOTER ) ) {
144 : /* Transactions coming in from these links are not parsed.
145 :
146 : We'll need to parse it so it's ready for downstream consumers.
147 : Equally importantly, we need to parse to extract the signature
148 : for dedup. Just parse it right into the output dcache. */
149 0 : txnm->txn_t_sz = (ushort)fd_txn_parse( fd_txn_m_payload( txnm ), txnm->payload_sz, txn, NULL );
150 0 : if( FD_UNLIKELY( !txnm->txn_t_sz ) ) FD_LOG_ERR(( "fd_txn_parse failed for vote transactions that should have been sigverified" ));
151 :
152 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_GOSSIP ) ) FD_MCNT_INC( DEDUP, GOSSIPED_VOTES_RECEIVED, 1UL );
153 0 : }
154 :
155 : /* Compute fd_hash(signature) for dedup. */
156 0 : ulong ha_dedup_tag = fd_hash( ctx->hashmap_seed, fd_txn_m_payload( txnm )+txn->signature_off, 64UL );
157 :
158 0 : int is_dup;
159 0 : FD_TCACHE_INSERT( is_dup, *ctx->tcache_sync, ctx->tcache_ring, ctx->tcache_depth, ctx->tcache_map, ctx->tcache_map_cnt, ha_dedup_tag );
160 0 : if( FD_LIKELY( is_dup ) ) {
161 0 : ctx->metrics.dedup_fail_cnt++;
162 0 : } else {
163 0 : ulong realized_sz = fd_txn_m_realized_footprint( txnm, 0 );
164 0 : ulong tspub = (ulong)fd_frag_meta_ts_comp( fd_tickcount() );
165 0 : fd_stem_publish( stem, 0UL, 0, ctx->out_chunk, realized_sz, 0UL, tsorig, tspub );
166 0 : ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, realized_sz, ctx->out_chunk0, ctx->out_wmark );
167 0 : }
168 0 : }
169 :
170 : static void
171 : privileged_init( fd_topo_t * topo,
172 0 : fd_topo_tile_t * tile ) {
173 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
174 :
175 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
176 0 : fd_dedup_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_dedup_ctx_t ), sizeof( fd_dedup_ctx_t ) );
177 0 : FD_TEST( fd_rng_secure( &ctx->hashmap_seed, 8U ) );
178 0 : }
179 :
180 : static void
181 : unprivileged_init( fd_topo_t * topo,
182 0 : fd_topo_tile_t * tile ) {
183 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
184 :
185 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
186 0 : fd_dedup_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_dedup_ctx_t ), sizeof( fd_dedup_ctx_t ) );
187 0 : fd_tcache_t * tcache = fd_tcache_join( fd_tcache_new( FD_SCRATCH_ALLOC_APPEND( l, fd_tcache_align(), fd_tcache_footprint( tile->dedup.tcache_depth, 0) ), tile->dedup.tcache_depth, 0 ) );
188 0 : if( FD_UNLIKELY( !tcache ) ) FD_LOG_ERR(( "fd_tcache_new failed" ));
189 :
190 0 : ctx->tcache_depth = fd_tcache_depth ( tcache );
191 0 : ctx->tcache_map_cnt = fd_tcache_map_cnt ( tcache );
192 0 : ctx->tcache_sync = fd_tcache_oldest_laddr( tcache );
193 0 : ctx->tcache_ring = fd_tcache_ring_laddr ( tcache );
194 0 : ctx->tcache_map = fd_tcache_map_laddr ( tcache );
195 :
196 0 : FD_TEST( tile->in_cnt<=sizeof( ctx->in )/sizeof( ctx->in[ 0 ] ) );
197 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
198 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
199 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
200 :
201 0 : ctx->in[i].mem = link_wksp->wksp;
202 0 : ctx->in[i].chunk0 = fd_dcache_compact_chunk0( ctx->in[i].mem, link->dcache );
203 0 : ctx->in[i].wmark = fd_dcache_compact_wmark ( ctx->in[i].mem, link->dcache, link->mtu );
204 :
205 0 : if( FD_UNLIKELY( !strcmp( link->name, "gossip_dedup" ) ) ) {
206 0 : ctx->in_kind[ i ] = IN_KIND_GOSSIP;
207 0 : } else if( FD_UNLIKELY( !strcmp( link->name, "voter_dedup" ) ) ) {
208 0 : ctx->in_kind[ i ] = IN_KIND_VOTER;
209 0 : } else if( FD_UNLIKELY( !strcmp( link->name, "verify_dedup" ) ) ) {
210 0 : ctx->in_kind[ i ] = IN_KIND_VERIFY;
211 0 : } else {
212 0 : FD_LOG_ERR(( "unexpected link name %s", link->name ));
213 0 : }
214 0 : }
215 :
216 0 : ctx->out_mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ 0 ] ].dcache_obj_id ].wksp_id ].wksp;
217 0 : ctx->out_chunk0 = fd_dcache_compact_chunk0( ctx->out_mem, topo->links[ tile->out_link_id[ 0 ] ].dcache );
218 0 : ctx->out_wmark = fd_dcache_compact_wmark ( ctx->out_mem, topo->links[ tile->out_link_id[ 0 ] ].dcache, topo->links[ tile->out_link_id[ 0 ] ].mtu );
219 0 : ctx->out_chunk = ctx->out_chunk0;
220 :
221 0 : ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, 1UL );
222 0 : if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
223 0 : FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
224 0 : }
225 :
226 : static ulong
227 : populate_allowed_seccomp( fd_topo_t const * topo,
228 : fd_topo_tile_t const * tile,
229 : ulong out_cnt,
230 0 : struct sock_filter * out ) {
231 0 : (void)topo;
232 0 : (void)tile;
233 :
234 0 : populate_sock_filter_policy_dedup( out_cnt, out, (uint)fd_log_private_logfile_fd() );
235 0 : return sock_filter_policy_dedup_instr_cnt;
236 0 : }
237 :
238 : static ulong
239 : populate_allowed_fds( fd_topo_t const * topo,
240 : fd_topo_tile_t const * tile,
241 : ulong out_fds_cnt,
242 0 : int * out_fds ) {
243 0 : (void)topo;
244 0 : (void)tile;
245 :
246 0 : if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
247 :
248 0 : ulong out_cnt = 0UL;
249 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
250 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
251 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
252 0 : return out_cnt;
253 0 : }
254 :
255 0 : #define STEM_BURST (1UL)
256 :
257 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_dedup_ctx_t
258 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_dedup_ctx_t)
259 :
260 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
261 0 : #define STEM_CALLBACK_DURING_FRAG during_frag
262 0 : #define STEM_CALLBACK_AFTER_FRAG after_frag
263 :
264 : #include "../../../../disco/stem/fd_stem.c"
265 :
266 : fd_topo_run_tile_t fd_tile_dedup = {
267 : .name = "dedup",
268 : .populate_allowed_seccomp = populate_allowed_seccomp,
269 : .populate_allowed_fds = populate_allowed_fds,
270 : .scratch_align = scratch_align,
271 : .scratch_footprint = scratch_footprint,
272 : .privileged_init = privileged_init,
273 : .unprivileged_init = unprivileged_init,
274 : .run = stem_run,
275 : };
|