Line data Source code
1 : /* This directory provides the 'fddev quic-trace' subcommand.
2 :
3 : The goal of quic-trace is to tap QUIC traffic on a live system, which
4 : requires encryption keys and other annoying connection state.
5 :
6 : quic-trace does this by tapping into the shared memory segments of a
7 : target tile running on the same host. It does so strictly read-only
8 : to minimize impact to a production system.
9 :
10 : This file (fd_quic_trace_main.c) provides the glue code required to
11 : join remote target tile objects.
12 :
13 : fd_quic_trace_rx_tile.c provides a fd_tango consumer for incoming
14 : QUIC packets. */
15 :
16 : #include "fd_quic_trace.h"
17 :
18 : #include "../../../../disco/metrics/fd_metrics.h"
19 : #include "../../../../disco/quic/fd_quic_tile.h"
20 : #include "../../../../discof/send/fd_send_tile.h"
21 : #include "../../../../waltz/quic/log/fd_quic_log_user.h"
22 : #include "../../../../ballet/hex/fd_hex.h"
23 : #include <stdlib.h>
24 :
25 : /* Define global variables */
26 :
27 : void const * fd_quic_trace_tile_ctx_remote;
28 : ulong fd_quic_trace_tile_ctx_raddr;
29 : ulong ** fd_quic_trace_target_fseq;
30 : ulong volatile * fd_quic_trace_link_metrics;
31 : void const * fd_quic_trace_log_base;
32 : peer_conn_id_map_t _fd_quic_trace_peer_map[1UL << PEER_MAP_LG_SLOT_CNT];
33 : peer_conn_id_map_t * fd_quic_trace_peer_map;
34 :
35 0 : #define EVENT_STREAM 0
36 0 : #define EVENT_ERROR 1
37 :
38 : static void
39 : quic_trace_cmd_args( int * pargc,
40 : char *** pargv,
41 0 : args_t * args ) {
42 0 : char const * event = fd_env_strip_cmdline_cstr( pargc, pargv, "--event", NULL, "stream" );
43 0 : if( 0==strcmp( event, "stream" ) ) {
44 0 : args->quic_trace.event = EVENT_STREAM;
45 0 : } else if( 0==strcmp( event, "error" ) ) {
46 0 : args->quic_trace.event = EVENT_ERROR;
47 0 : } else {
48 0 : FD_LOG_ERR(( "Unsupported QUIC event type \"%s\"", event ));
49 0 : }
50 :
51 0 : args->quic_trace.dump = fd_env_strip_cmdline_contains( pargc, pargv, "--dump" );
52 0 : args->quic_trace.dump_config = fd_env_strip_cmdline_contains( pargc, pargv, "--dump-config" );
53 0 : args->quic_trace.dump_conns = fd_env_strip_cmdline_contains( pargc, pargv, "--dump-conns" );
54 0 : args->quic_trace.trace_send = fd_env_strip_cmdline_contains( pargc, pargv, "--send-tile" );
55 0 : }
56 :
57 : static char const *
58 0 : dump_val_enum_role( int role ) {
59 0 : switch( role ) {
60 0 : case FD_QUIC_ROLE_CLIENT:
61 0 : return "ROLE_CLIENT";
62 0 : case FD_QUIC_ROLE_SERVER:
63 0 : return "ROLE_SERVER";
64 0 : default:
65 0 : return "ROLE_UNKNOWN";
66 0 : }
67 0 : }
68 :
69 : static char const *
70 0 : dump_val_bool( int value ) {
71 0 : switch( value ) {
72 0 : case 0: return "false";
73 0 : case 1: return "true";
74 0 : default: return "invalid"; /* in case something is assuming a config is in {0,1} */
75 0 : }
76 0 : }
77 :
78 : void
79 0 : dump_quic_config( fd_quic_config_t * config ) {
80 0 : switch( config->role ) {
81 0 : case FD_QUIC_ROLE_CLIENT:
82 0 : FD_LOG_NOTICE(( "CONFIG: role: %d FD_QUIC_ROLE_CLIENT", config->role ));
83 0 : break;
84 0 : case FD_QUIC_ROLE_SERVER:
85 0 : FD_LOG_NOTICE(( "CONFIG: role: %d FD_QUIC_ROLE_SERVER", config->role ));
86 0 : break;
87 0 : default:
88 0 : FD_LOG_NOTICE(( "CONFIG: role: %d UNKNOWN", config->role ));
89 0 : }
90 :
91 0 : #define HEXFMT32 "%02x%02x%02x%02x" "%02x%02x%02x%02x" \
92 0 : "%02x%02x%02x%02x" "%02x%02x%02x%02x" \
93 0 : "%02x%02x%02x%02x" "%02x%02x%02x%02x" \
94 0 : "%02x%02x%02x%02x" "%02x%02x%02x%02x"
95 0 : #define HEXARG32(X) (X)[0], (X)[1], (X)[2], (X)[3], \
96 0 : (X)[4], (X)[5], (X)[6], (X)[7], \
97 0 : (X)[8], (X)[9], (X)[10], (X)[11], \
98 0 : (X)[12], (X)[13], (X)[14], (X)[15], \
99 0 : (X)[16], (X)[17], (X)[18], (X)[19], \
100 0 : (X)[20], (X)[21], (X)[22], (X)[23], \
101 0 : (X)[24], (X)[25], (X)[26], (X)[27], \
102 0 : (X)[28], (X)[29], (X)[30], (X)[31]
103 :
104 0 : #define dump_val_class_enum( NAME, FMT, CLASS, UNIT, VAL ) \
105 0 : FD_LOG_NOTICE(( "CONFIG: " #NAME ": " FMT " - %s", config->NAME, dump_val_enum_##NAME( config->NAME ) ));
106 0 : #define dump_val_class_bool( NAME, FMT, CLASS, UNIT, VAL ) \
107 0 : FD_LOG_NOTICE(( "CONFIG: " #NAME ": " FMT " - %s", config->NAME, dump_val_bool( config->NAME ) ));
108 0 : #define dump_val_class_units( NAME, FMT, CLASS, UNIT, VAL ) \
109 0 : FD_LOG_NOTICE(( "CONFIG: " #NAME ": " FMT " %s", config->NAME, UNIT ));
110 0 : #define dump_val_class_value( NAME, FMT, CLASS, UNIT, VAL ) \
111 0 : FD_LOG_NOTICE(( "CONFIG: " #NAME ": " FMT, config->NAME ));
112 0 : #define dump_val_class_ptr( NAME, FMT, CLASS, UNIT, VAL ) \
113 0 : FD_LOG_NOTICE(( "CONFIG: " #NAME ": 0x%lx", (ulong)config->NAME ));
114 0 : #define dump_val_class_hex32( NAME, FMT, CLASS, UNIT, VAL ) \
115 0 : FD_LOG_NOTICE(( "CONFIG: " #NAME ": 0x" HEXFMT32, HEXARG32(config->NAME) ));
116 :
117 0 : #define dump_val( NAME, FMT, CLASS, UNIT, VAL ) \
118 0 : dump_val_class_##CLASS( NAME, FMT, CLASS, UNIT, VAL )
119 :
120 0 : FD_QUIC_CONFIG_LIST( dump_val, x )
121 0 : }
122 :
123 : static char const *
124 0 : peer_cid_str( fd_quic_conn_t const * conn ) {
125 0 : static char buf[FD_QUIC_MAX_CONN_ID_SZ*2];
126 0 : ulong sz = conn->peer_cids[0].sz;
127 0 : uchar const * cid = conn->peer_cids[0].conn_id;
128 0 : sz = fd_ulong_min( sz, FD_QUIC_MAX_CONN_ID_SZ );
129 :
130 0 : fd_hex_encode( buf, cid, sz );
131 :
132 0 : return buf;
133 0 : }
134 :
135 : static void
136 0 : dump_connection( fd_quic_conn_t const * conn ) {
137 :
138 0 : #define CONN_MEMB_LIST(X,CONN,...) \
139 0 : X( conn_idx, "%u", ( (CONN).conn_idx ), __VA_ARGS__ ) \
140 0 : X( state, "%u", ( (CONN).state ), __VA_ARGS__ ) \
141 0 : X( reason, "%u", ( (CONN).reason ), __VA_ARGS__ ) \
142 0 : X( app_reason, "%u", ( (CONN).app_reason ), __VA_ARGS__ ) \
143 0 : X( tx_ptr, "%p", ( ((void*)(CONN).tx_ptr) ), __VA_ARGS__ ) \
144 0 : X( unacked_sz, "%lu", ( (CONN).unacked_sz ), __VA_ARGS__ ) \
145 0 : X( flags, "%x", ( (CONN).flags ), __VA_ARGS__ ) \
146 0 : X( conn_gen, "%u", ( (CONN).conn_gen ), __VA_ARGS__ ) \
147 0 : X( server, "%d", ( (CONN).server ), __VA_ARGS__ ) \
148 0 : X( established, "%d", ( (CONN).established ), __VA_ARGS__ ) \
149 0 : X( transport_params_set, "%d", ( (CONN).transport_params_set ), __VA_ARGS__ ) \
150 0 : X( called_conn_new, "%d", ( (CONN).called_conn_new ), __VA_ARGS__ ) \
151 0 : X( visited, "%d", ( (CONN).visited ), __VA_ARGS__ ) \
152 0 : X( key_phase, "%d", ( (CONN).key_phase ), __VA_ARGS__ ) \
153 0 : X( key_update, "%d", ( (CONN).key_update ), __VA_ARGS__ ) \
154 0 : X( our_conn_id, "%016lx", ( (CONN).our_conn_id ), __VA_ARGS__ ) \
155 0 : X( peer[0].ip_addr, "%08x", ( (uint)(CONN).peer[0].ip_addr ), __VA_ARGS__ ) \
156 0 : X( peer[0].udp_port, "%u", ( (uint)(CONN).peer[0].udp_port ), __VA_ARGS__ ) \
157 0 : X( handshake_complete, "%d", ( (CONN).handshake_complete ), __VA_ARGS__ ) \
158 0 : X( handshake_done_send, "%d", ( (CONN).handshake_done_send ), __VA_ARGS__ ) \
159 0 : X( handshake_done_ackd, "%d", ( (CONN).handshake_done_ackd ), __VA_ARGS__ ) \
160 0 : X( exp_pkt_number[0], "%lu", ( (CONN).exp_pkt_number[0] ), __VA_ARGS__ ) \
161 0 : X( exp_pkt_number[1], "%lu", ( (CONN).exp_pkt_number[1] ), __VA_ARGS__ ) \
162 0 : X( exp_pkt_number[2], "%lu", ( (CONN).exp_pkt_number[2] ), __VA_ARGS__ ) \
163 0 : X( pkt_number[0], "%lu", ( (CONN).pkt_number[0] ), __VA_ARGS__ ) \
164 0 : X( pkt_number[1], "%lu", ( (CONN).pkt_number[1] ), __VA_ARGS__ ) \
165 0 : X( pkt_number[2], "%lu", ( (CONN).pkt_number[2] ), __VA_ARGS__ ) \
166 0 : X( last_pkt_number[0], "%lu", ( (CONN).last_pkt_number[0] ), __VA_ARGS__ ) \
167 0 : X( last_pkt_number[1], "%lu", ( (CONN).last_pkt_number[1] ), __VA_ARGS__ ) \
168 0 : X( last_pkt_number[2], "%lu", ( (CONN).last_pkt_number[2] ), __VA_ARGS__ ) \
169 0 : X( idle_timeout_ticks, "%lu", ( (CONN).idle_timeout_ticks ), __VA_ARGS__ ) \
170 0 : X( last_activity, "%lu", ( (CONN).last_activity ), __VA_ARGS__ ) \
171 0 : X( last_ack, "%lu", ( (CONN).last_ack ), __VA_ARGS__ ) \
172 0 : X( used_pkt_meta, "%lu", ( (CONN).used_pkt_meta ), __VA_ARGS__ ) \
173 0 : X( peer_cid, "%s", ( peer_cid_str(&(CONN)) ), __VA_ARGS__ )
174 :
175 0 : #define UNPACK(...) __VA_ARGS__
176 0 : #define CONN_MEMB_FMT(NAME,FMT,ARGS,...) " " #NAME "=" FMT
177 0 : #define CONN_MEMB_ARGS(NAME,FMT,ARGS,...) , UNPACK ARGS
178 0 : FD_LOG_NOTICE(( "CONN: "
179 0 : CONN_MEMB_LIST(CONN_MEMB_FMT,*conn,_)
180 0 : CONN_MEMB_LIST(CONN_MEMB_ARGS,*conn,_)
181 0 : ));
182 0 : }
183 :
184 : void
185 : quic_trace_cmd_fn( args_t * args,
186 0 : config_t * config ) {
187 0 : fd_topo_t * topo = &config->topo;
188 0 : fd_topo_join_workspaces( topo, FD_SHMEM_JOIN_MODE_READ_ONLY );
189 0 : fd_topo_fill( topo );
190 :
191 0 : int trace_send = args->quic_trace.trace_send;
192 :
193 0 : char const * tile_names[] = {"quic", "send"};
194 0 : fd_topo_tile_t * target_tile = NULL;
195 0 : for( ulong tile_idx=0UL; tile_idx<topo->tile_cnt; tile_idx++ ) {
196 0 : if( 0==strcmp( topo->tiles[tile_idx].name, tile_names[trace_send] ) ) {
197 0 : target_tile = &topo->tiles[tile_idx];
198 0 : break;
199 0 : }
200 0 : }
201 0 : if( !target_tile ) FD_LOG_ERR(( "%s tile not found in topology", tile_names[trace_send] ));
202 :
203 0 : ulong const target_in_cnt = target_tile->in_cnt;
204 0 : ulong const target_out_cnt = target_tile->out_cnt;
205 0 : if( FD_UNLIKELY( !trace_send && target_in_cnt != 1UL ) ) { /* FIXME */
206 0 : FD_LOG_ERR(( "Sorry, fd_quic_trace does not support multiple net tiles yet" ));
207 0 : }
208 :
209 : /* Ugly: fd_quic_ctx_t uses non-relocatable object addressing.
210 : We need to rebase pointers. _remote{...} is local pointer to original
211 : objects in shared memory, _raddr is the remote address of the original
212 : object. */
213 :
214 0 : fd_quic_trace_tile_ctx_remote = fd_topo_obj_laddr( topo, target_tile->tile_obj_id );
215 0 : ulong quic_raddr = (ulong)tile_member( fd_quic_trace_tile_ctx_remote, quic, trace_send );
216 0 : ulong tile_align = fd_ulong_if( trace_send, alignof(fd_send_tile_ctx_t), alignof(fd_quic_ctx_t) );
217 0 : ulong tile_ctx_sz = fd_ulong_if( trace_send, sizeof(fd_send_tile_ctx_t), sizeof(fd_quic_ctx_t) );
218 0 : fd_quic_trace_tile_ctx_raddr = quic_raddr - fd_ulong_align_up( tile_ctx_sz, fd_ulong_max( tile_align, fd_quic_align() ) );
219 :
220 0 : FD_LOG_INFO(("quic_raddr %p", (void *)quic_raddr));
221 0 : FD_LOG_INFO((
222 0 : "%s tile state at %p in tile address space and %p in local address space",
223 0 : tile_names[trace_send], (void *)fd_quic_trace_tile_ctx_raddr, fd_quic_trace_tile_ctx_remote));
224 :
225 : /* target_net link tracking */
226 0 : char out_link_name[16];
227 0 : snprintf( out_link_name, sizeof(out_link_name), "%s_net", tile_names[trace_send] );
228 0 : ulong link_id = fd_topo_find_link( topo, out_link_name, 0 );
229 0 : if( FD_UNLIKELY( link_id == ULONG_MAX ) ) FD_LOG_ERR(("%s not found", out_link_name));
230 0 : fd_topo_link_t * target_net = &topo->links[link_id];
231 :
232 : /* net_target link tracking */
233 0 : snprintf( out_link_name, sizeof(out_link_name), "net_%s", tile_names[trace_send] );
234 0 : link_id = fd_topo_find_link( topo, out_link_name, 0 );
235 0 : if( FD_UNLIKELY( link_id == ULONG_MAX ) ) FD_LOG_ERR(("%s not found", out_link_name));
236 :
237 0 : fd_topo_link_t * net_target = &topo->links[link_id];
238 0 : fd_net_rx_bounds_t net_in_bounds;
239 0 : fd_net_rx_bounds_init(&net_in_bounds, net_target->dcache);
240 0 : FD_LOG_INFO(("net->%s dcache at %p", tile_names[trace_send], (void *)net_target->dcache));
241 :
242 : /* Join shared memory objects
243 : Mostly nops but verifies object magic numbers to ensure that
244 : derived pointers are correct. */
245 :
246 0 : FD_LOG_INFO(( "Joining fd_quic in %s tile", tile_names[trace_send] ));
247 0 : fd_quic_t * quic_remote = fd_type_pun( translate_ptr( (void*)quic_raddr ) );
248 0 : fd_quic_t * quic = fd_quic_join( quic_remote );
249 0 : if( !quic ) FD_LOG_ERR( ("Failed to join fd_quic in %s tile", tile_names[trace_send]));
250 :
251 : /* build ctx */
252 0 : fd_quic_trace_ctx_t trace_ctx[1] = {
253 0 : {.dump = args->quic_trace.dump,
254 0 : .dump_config = args->quic_trace.dump_config,
255 0 : .dump_conns = args->quic_trace.dump_conns,
256 0 : .trace_send = args->quic_trace.trace_send,
257 0 : .net_out_base = (ulong)fd_wksp_containing(target_net->dcache),
258 0 : .quic = quic,
259 0 : .net_in_bounds = {net_in_bounds} } };
260 :
261 : /* dump config */
262 0 : if( trace_ctx->dump_config ) {
263 0 : dump_quic_config( &quic->config );
264 0 : }
265 :
266 : /* initialize peer conn_id map */
267 0 : void * shmap = peer_conn_id_map_new( _fd_quic_trace_peer_map );
268 0 : peer_conn_id_map_t * peer_map = peer_conn_id_map_join( shmap );
269 :
270 : /* set the global */
271 0 : fd_quic_trace_peer_map = peer_map;
272 :
273 : /* iterate connections - dump and/or insert */
274 :
275 0 : #define CONN_STATE_LIST(X,SEP,...) \
276 0 : X( INVALID , __VA_ARGS__ ) SEP \
277 0 : X( HANDSHAKE , __VA_ARGS__ ) SEP \
278 0 : X( HANDSHAKE_COMPLETE , __VA_ARGS__ ) SEP \
279 0 : X( ACTIVE , __VA_ARGS__ ) SEP \
280 0 : X( PEER_CLOSE , __VA_ARGS__ ) SEP \
281 0 : X( ABORT , __VA_ARGS__ ) SEP \
282 0 : X( CLOSE_PENDING , __VA_ARGS__ ) SEP \
283 0 : X( DEAD , __VA_ARGS__ )
284 0 : ulong conn_cnt = quic->limits.conn_cnt;
285 0 : ulong state_unknown = 0;
286 0 : #define COMMA ,
287 0 : #define _(X,Y) [FD_QUIC_CONN_STATE_##X] = 0
288 0 : ulong state_cnt[] = { CONN_STATE_LIST(_,COMMA,Y) };
289 0 : ulong state_cap = sizeof( state_cnt) / sizeof( state_cnt[0] );
290 0 : #undef _
291 :
292 0 : for( ulong j=0UL; j<conn_cnt; ++j ) {
293 0 : fd_quic_conn_t const * conn = fd_quic_trace_conn_at_idx( quic, j );
294 0 : ulong state = conn->state;
295 0 : ulong * state_bucket = state < state_cap ? &state_cnt[state] : &state_unknown;
296 :
297 0 : (*state_bucket)++;
298 :
299 0 : switch( conn->state ) {
300 0 : case FD_QUIC_CONN_STATE_INVALID:
301 : /* indicates the connection is free */
302 0 : break;
303 0 : default:
304 0 : if( trace_ctx->dump_conns ) {
305 0 : dump_connection( conn );
306 0 : }
307 :
308 : /* add connection to the peer_conn_id_map */
309 :
310 : /* when we receive a one-rtt quic packet, we don't know the conn_id
311 : size, so we assume its longer than 8 bytes, and truncate the rest */
312 0 : ulong key;
313 0 : memcpy( &key, conn->peer_cids[0].conn_id, sizeof( key ) );
314 0 : peer_conn_id_map_t * entry = peer_conn_id_map_insert( peer_map, key );
315 0 : if( entry ) {
316 0 : entry->conn_idx = (uint)j;
317 0 : } else {
318 : /* this is a diagnostics tool, so we'll continue here */
319 0 : FD_LOG_WARNING(( "Peer connection id map full. Continuing with partial functionality" ));
320 0 : }
321 0 : }
322 0 : }
323 :
324 0 : #define _FMT(X,Y) "%s=%lu"
325 0 : #define _ARG(X,Y) #X, state_cnt[FD_QUIC_CONN_STATE_##X]
326 0 : FD_LOG_NOTICE(( "Total connections: %lu "
327 0 : CONN_STATE_LIST(_FMT," ",Y), conn_cnt,
328 0 : CONN_STATE_LIST(_ARG,COMMA,Y) ));
329 0 : #undef _FMT
330 0 : #undef _ARG
331 :
332 : /* Locate original fseq objects
333 : These are monitored to ensure the trace RX tile doesn't skip ahead
334 : of the target tile. */
335 0 : fd_quic_trace_target_fseq = malloc( target_in_cnt * sizeof(ulong) );
336 0 : for( ulong i=0UL; i<target_in_cnt; i++ ) {
337 0 : fd_quic_trace_target_fseq[i] = target_tile->in_link_fseq[i];
338 0 : }
339 :
340 : /* Locate log buffer */
341 :
342 0 : void * log = (void *)((ulong)quic_remote + quic->layout.log_off);
343 0 : fd_quic_log_rx_t log_rx[1];
344 0 : FD_LOG_DEBUG(( "Joining %s log", tile_names[trace_send] ));
345 0 : if( FD_UNLIKELY( !fd_quic_log_rx_join( log_rx, log ) ) ) {
346 0 : FD_LOG_ERR(( "fd_quic_log_rx_join failed" ));
347 0 : }
348 0 : fd_quic_trace_log_base = log_rx->base;
349 :
350 : /* Redirect metadata writes to dummy buffers.
351 : Without this hack, stem_run would attempt to write metadata updates
352 : into the target topology which is read-only. */
353 :
354 : /* ... redirect metric updates */
355 0 : ulong * metrics = aligned_alloc( FD_METRICS_ALIGN, FD_METRICS_FOOTPRINT( target_in_cnt, target_out_cnt ) );
356 0 : if( !metrics ) FD_LOG_ERR(( "out of memory" ));
357 0 : fd_memset( metrics, 0, FD_METRICS_FOOTPRINT( target_in_cnt, target_out_cnt ) );
358 0 : fd_metrics_register( metrics );
359 :
360 0 : fd_quic_trace_link_metrics = fd_metrics_link_in( fd_metrics_base_tl, 0 );
361 :
362 : /* Join net->target link consumer */
363 :
364 0 : fd_frag_meta_t const *rx_mcache = net_target->mcache;
365 0 : fd_frag_meta_t const *tx_mcache = target_net->mcache;
366 :
367 0 : trace_ctx->quic = quic;
368 :
369 0 : FD_LOG_NOTICE(( "quic-trace on %s tile starting ...", tile_names[trace_send] ));
370 0 : switch( args->quic_trace.event ) {
371 0 : case EVENT_STREAM:
372 0 : fd_quic_trace_rx_tile( trace_ctx, rx_mcache, tx_mcache );
373 0 : break;
374 0 : case EVENT_ERROR:
375 0 : fd_quic_trace_log_tile( trace_ctx, log_rx->mcache );
376 0 : break;
377 0 : default:
378 0 : __builtin_unreachable();
379 0 : }
380 :
381 0 : fd_quic_log_rx_leave( log_rx );
382 0 : }
383 :
384 : action_t fd_action_quic_trace = {
385 : .name = "quic-trace",
386 : .args = quic_trace_cmd_args,
387 : .fn = quic_trace_cmd_fn,
388 : .description = "Trace quic tile",
389 : .is_diagnostic = 1
390 : };
|