LCOV - code coverage report
Current view: top level - app/fdctl/run/tiles - fd_quic.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 23 341 6.7 %
Date: 2024-11-13 11:58:15 Functions: 3 21 14.3 %

          Line data    Source code
       1             : #include "../../../../disco/tiles.h"
       2             : 
       3             : #include "generated/quic_seccomp.h"
       4             : 
       5             : #include "../../../../disco/metrics/fd_metrics.h"
       6             : #include "../../../../waltz/quic/fd_quic.h"
       7             : #include "../../../../waltz/xdp/fd_xsk_aio.h"
       8             : #include "../../../../waltz/xdp/fd_xsk.h"
       9             : #include "../../../../waltz/ip/fd_netlink.h"
      10             : #include "../../../../disco/quic/fd_tpu.h"
      11             : 
      12             : #include <linux/unistd.h>
      13             : #include <sys/random.h>
      14             : 
      15             : /* fd_quic provides a TPU server tile.
      16             : 
      17             :    This tile handles incoming transactions that clients request to be
      18             :    included in blocks.  Supported protocols currently include TPU/UDP
      19             :    and TPU/QUIC.
      20             : 
      21             :    The fd_quic tile acts as a plain old Tango producer writing to a cnc
      22             :    and an mcache.  The tile will defragment multi-packet TPU streams
      23             :    coming in from QUIC, such that each mcache/dcache pair forms a
      24             :    complete txn.  This requires the dcache mtu to be at least that of
      25             :    the largest allowed serialized txn size.
      26             : 
      27             :    QUIC tiles don't service network devices directly, but rely on
      28             :    packets being received by net tiles and forwarded on via. a mux
      29             :    (multiplexer).  An arbitrary number of QUIC tiles can be run.  Each
      30             :    UDP flow must stick to one QUIC tile. */
      31             : 
      32             : typedef struct {
      33             :   fd_tpu_reasm_t * reasm;
      34             : 
      35             :   fd_stem_context_t * stem;
      36             : 
      37             :   fd_quic_t *      quic;
      38             :   const fd_aio_t * quic_rx_aio;
      39             :   fd_aio_t         quic_tx_aio[1];
      40             : 
      41             : # define ED25519_PRIV_KEY_SZ (32)
      42           0 : # define ED25519_PUB_KEY_SZ  (32)
      43             :   uchar            tls_priv_key[ ED25519_PRIV_KEY_SZ ];
      44             :   uchar            tls_pub_key [ ED25519_PUB_KEY_SZ  ];
      45             :   fd_sha512_t      sha512[1]; /* used for signing */
      46             : 
      47             :   uchar buffer[ FD_NET_MTU ];
      48             : 
      49             :   ulong conn_seq; /* current quic connection sequence number */
      50             : 
      51             :   ulong round_robin_cnt;
      52             :   ulong round_robin_id;
      53             : 
      54             :   fd_wksp_t * in_mem;
      55             :   ulong       in_chunk0;
      56             :   ulong       in_wmark;
      57             : 
      58             :   fd_frag_meta_t * net_out_mcache;
      59             :   ulong *          net_out_sync;
      60             :   ulong            net_out_depth;
      61             :   ulong            net_out_seq;
      62             : 
      63             :   fd_wksp_t * net_out_mem;
      64             :   ulong       net_out_chunk0;
      65             :   ulong       net_out_wmark;
      66             :   ulong       net_out_chunk;
      67             : 
      68             :   fd_wksp_t * verify_out_mem;
      69             : 
      70             :   struct {
      71             :     ulong legacy_reasm_append [ FD_METRICS_COUNTER_QUIC_TILE_NON_QUIC_REASSEMBLY_APPEND_CNT ];
      72             :     ulong legacy_reasm_publish[ FD_METRICS_COUNTER_QUIC_TILE_NON_QUIC_REASSEMBLY_PUBLISH_CNT ];
      73             : 
      74             :     ulong reasm_append [ FD_METRICS_COUNTER_QUIC_TILE_REASSEMBLY_APPEND_CNT ];
      75             :     ulong reasm_publish[ FD_METRICS_COUNTER_QUIC_TILE_REASSEMBLY_PUBLISH_CNT ];
      76             :   } metrics;
      77             : } fd_quic_ctx_t;
      78             : 
      79             : FD_FN_CONST static inline fd_quic_limits_t
      80           3 : quic_limits( fd_topo_tile_t const * tile ) {
      81           3 :   fd_quic_limits_t limits = {
      82           3 :     .conn_cnt      = tile->quic.max_concurrent_connections,
      83           3 :     .handshake_cnt = tile->quic.max_concurrent_handshakes,
      84             : 
      85             :     /* fd_quic will not issue nor use any new connection IDs after
      86             :        completing a handshake.  Connection migration is not supported
      87             :        either. */
      88           3 :     .conn_id_cnt      = FD_QUIC_MIN_CONN_ID_CNT,
      89           3 :     .inflight_pkt_cnt = tile->quic.max_inflight_quic_packets,
      90           3 :     .tx_buf_sz        = 0,
      91           3 :     .rx_stream_cnt    = tile->quic.max_concurrent_streams_per_connection,
      92           3 :     .stream_pool_cnt  = tile->quic.max_concurrent_streams_per_connection * tile->quic.max_concurrent_connections,
      93           3 :   };
      94           3 :   return limits;
      95           3 : }
      96             : 
      97             : FD_FN_CONST static inline ulong
      98           3 : scratch_align( void ) {
      99           3 :   return 4096UL;
     100           3 : }
     101             : 
     102             : FD_FN_PURE static inline ulong
     103           3 : scratch_footprint( fd_topo_tile_t const * tile ) {
     104           3 :   fd_quic_limits_t limits = quic_limits( tile );
     105           3 :   ulong            l      = FD_LAYOUT_INIT;
     106           3 :   l = FD_LAYOUT_APPEND( l, alignof( fd_quic_ctx_t ), sizeof( fd_quic_ctx_t )      );
     107           3 :   l = FD_LAYOUT_APPEND( l, fd_aio_align(),           fd_aio_footprint()           );
     108           3 :   l = FD_LAYOUT_APPEND( l, fd_quic_align(),          fd_quic_footprint( &limits ) );
     109           3 :   return FD_LAYOUT_FINI( l, scratch_align() );
     110           3 : }
     111             : 
     112             : /* legacy_stream_notify is called for transactions sent via TPU/UDP. For
     113             :    now both QUIC and non-QUIC transactions are accepted, with traffic
     114             :    type determined by port.
     115             : 
     116             :    UDP transactions must fit in one packet and cannot be fragmented, and
     117             :    notify here means the entire packet was received. */
     118             : 
     119             : static void
     120             : legacy_stream_notify( fd_quic_ctx_t * ctx,
     121             :                       uchar *         packet,
     122           0 :                       ulong           packet_sz ) {
     123             : 
     124           0 :   fd_stem_context_t * stem = ctx->stem;
     125             : 
     126           0 :   uint                  tsorig = (uint)fd_frag_meta_ts_comp( fd_tickcount() );
     127           0 :   fd_tpu_reasm_slot_t * slot   = fd_tpu_reasm_prepare( ctx->reasm, tsorig );
     128             : 
     129           0 :   int add_err = fd_tpu_reasm_append( ctx->reasm, slot, packet, packet_sz, 0UL );
     130           0 :   ctx->metrics.legacy_reasm_append[ add_err ]++;
     131           0 :   if( FD_UNLIKELY( add_err!=FD_TPU_REASM_SUCCESS ) ) return;
     132             : 
     133           0 :   uint   tspub = (uint)fd_frag_meta_ts_comp( fd_tickcount() );
     134           0 :   void * base  = ctx->verify_out_mem;
     135           0 :   ulong  seq   = stem->seqs[0];
     136             : 
     137           0 :   int pub_err = fd_tpu_reasm_publish( ctx->reasm, slot, stem->mcaches[0], base, seq, tspub );
     138           0 :   ctx->metrics.legacy_reasm_publish[ pub_err ]++;
     139           0 :   if( FD_UNLIKELY( pub_err!=FD_TPU_REASM_SUCCESS ) ) return;
     140             : 
     141           0 :   fd_stem_advance( stem, 0UL );
     142           0 : }
     143             : 
     144             : /* Because of the separate mcache for publishing network fragments
     145             :    back to networking tiles, which is not managed by the mux, we
     146             :    need to periodically update the sync. */
     147             : static void
     148           0 : during_housekeeping( fd_quic_ctx_t * ctx ) {
     149           0 :   fd_mcache_seq_update( ctx->net_out_sync, ctx->net_out_seq );
     150           0 : }
     151             : 
     152             : /* This tile always publishes messages downstream, even if there are
     153             :    no credits available.  It ignores the flow control of the downstream
     154             :    verify tile.  This is OK as the verify tile is written to expect
     155             :    this behavior, and enables the QUIC tile to publish as fast as it
     156             :    can.  It would currently be difficult trying to backpressure further
     157             :    up the stack to the network itself. */
     158             : static inline void
     159             : before_credit( fd_quic_ctx_t *     ctx,
     160             :                fd_stem_context_t * stem,
     161           0 :                int *               charge_busy ) {
     162           0 :   ctx->stem = stem;
     163             : 
     164             :   /* Publishes to mcache via callbacks */
     165           0 :   *charge_busy = fd_quic_service( ctx->quic );
     166           0 : }
     167             : 
     168             : static inline void
     169           0 : metrics_write( fd_quic_ctx_t * ctx ) {
     170           0 :   FD_MCNT_ENUM_COPY( QUIC_TILE, NON_QUIC_REASSEMBLY_APPEND,  ctx->metrics.legacy_reasm_append );
     171           0 :   FD_MCNT_ENUM_COPY( QUIC_TILE, NON_QUIC_REASSEMBLY_PUBLISH, ctx->metrics.legacy_reasm_publish );
     172           0 :   FD_MCNT_ENUM_COPY( QUIC_TILE, REASSEMBLY_APPEND,           ctx->metrics.reasm_append );
     173           0 :   FD_MCNT_ENUM_COPY( QUIC_TILE, REASSEMBLY_PUBLISH,          ctx->metrics.reasm_publish );
     174             : 
     175           0 :   FD_MCNT_SET(   QUIC, RECEIVED_PACKETS, ctx->quic->metrics.net_rx_pkt_cnt );
     176           0 :   FD_MCNT_SET(   QUIC, RECEIVED_BYTES,   ctx->quic->metrics.net_rx_byte_cnt );
     177           0 :   FD_MCNT_SET(   QUIC, SENT_PACKETS,     ctx->quic->metrics.net_tx_pkt_cnt );
     178           0 :   FD_MCNT_SET(   QUIC, SENT_BYTES,       ctx->quic->metrics.net_tx_byte_cnt );
     179             : 
     180           0 :   FD_MGAUGE_SET( QUIC, CONNECTIONS_ACTIVE,  ctx->quic->metrics.conn_active_cnt );
     181           0 :   FD_MCNT_SET(   QUIC, CONNECTIONS_CREATED, ctx->quic->metrics.conn_created_cnt );
     182           0 :   FD_MCNT_SET(   QUIC, CONNECTIONS_CLOSED,  ctx->quic->metrics.conn_closed_cnt );
     183           0 :   FD_MCNT_SET(   QUIC, CONNECTIONS_ABORTED, ctx->quic->metrics.conn_aborted_cnt );
     184           0 :   FD_MCNT_SET(   QUIC, CONNECTIONS_TIMED_OUT, ctx->quic->metrics.conn_timeout_cnt );
     185           0 :   FD_MCNT_SET(   QUIC, CONNECTIONS_RETRIED, ctx->quic->metrics.conn_retry_cnt );
     186             : 
     187           0 :   FD_MCNT_SET(   QUIC, CONNECTION_ERROR_NO_SLOTS,   ctx->quic->metrics.conn_err_no_slots_cnt );
     188           0 :   FD_MCNT_SET(   QUIC, CONNECTION_ERROR_TLS_FAIL,   ctx->quic->metrics.conn_err_tls_fail_cnt );
     189           0 :   FD_MCNT_SET(   QUIC, CONNECTION_ERROR_RETRY_FAIL, ctx->quic->metrics.conn_err_retry_fail_cnt );
     190             : 
     191           0 :   FD_MCNT_SET(   QUIC, HANDSHAKES_CREATED,         ctx->quic->metrics.hs_created_cnt );
     192           0 :   FD_MCNT_SET(   QUIC, HANDSHAKE_ERROR_ALLOC_FAIL, ctx->quic->metrics.hs_err_alloc_fail_cnt );
     193             : 
     194           0 :   FD_MCNT_SET(   QUIC, STREAM_OPENED, ctx->quic->metrics.stream_opened_cnt );
     195           0 :   FD_MCNT_ENUM_COPY( QUIC, STREAM_CLOSED, ctx->quic->metrics.stream_closed_cnt );
     196           0 :   FD_MGAUGE_SET( QUIC, STREAM_ACTIVE, ctx->quic->metrics.stream_active_cnt );
     197             : 
     198           0 :   FD_MCNT_SET(  QUIC, STREAM_RECEIVED_EVENTS, ctx->quic->metrics.stream_rx_event_cnt );
     199           0 :   FD_MCNT_SET(  QUIC, STREAM_RECEIVED_BYTES,  ctx->quic->metrics.stream_rx_byte_cnt );
     200             : 
     201           0 :   FD_MCNT_ENUM_COPY( QUIC, RECEIVED_FRAMES, ctx->quic->metrics.frame_rx_cnt );
     202           0 : }
     203             : 
     204             : static int
     205             : before_frag( fd_quic_ctx_t * ctx,
     206             :              ulong           in_idx,
     207             :              ulong           seq,
     208           0 :              ulong           sig ) {
     209           0 :   (void)in_idx;
     210           0 :   (void)seq;
     211             : 
     212           0 :   ulong proto = fd_disco_netmux_sig_proto( sig );
     213           0 :   if( FD_UNLIKELY( proto!=DST_PROTO_TPU_UDP && proto!=DST_PROTO_TPU_QUIC ) ) return 1;
     214             : 
     215           0 :   ulong hash = fd_disco_netmux_sig_hash( sig );
     216           0 :   if( FD_UNLIKELY( (hash % ctx->round_robin_cnt) != ctx->round_robin_id ) ) return 1;
     217             : 
     218           0 :   return 0;
     219           0 : }
     220             : 
     221             : static void
     222             : during_frag( fd_quic_ctx_t * ctx,
     223             :              ulong           in_idx,
     224             :              ulong           seq,
     225             :              ulong           sig,
     226             :              ulong           chunk,
     227           0 :              ulong           sz ) {
     228           0 :   (void)in_idx;
     229           0 :   (void)seq;
     230           0 :   (void)sig;
     231             : 
     232           0 :   if( FD_UNLIKELY( chunk<ctx->in_chunk0 || chunk>ctx->in_wmark || sz > FD_NET_MTU ) )
     233           0 :     FD_LOG_ERR(( "chunk %lu %lu corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in_chunk0, ctx->in_wmark ));
     234             : 
     235           0 :   uchar * src = (uchar *)fd_chunk_to_laddr( ctx->in_mem, chunk );
     236           0 :   fd_memcpy( ctx->buffer, src, sz ); /* TODO: Eliminate copy... fd_aio needs refactoring */
     237           0 : }
     238             : 
     239             : static void
     240             : after_frag( fd_quic_ctx_t *     ctx,
     241             :             ulong               in_idx,
     242             :             ulong               seq,
     243             :             ulong               sig,
     244             :             ulong               chunk,
     245             :             ulong               sz,
     246             :             ulong               tsorig,
     247           0 :             fd_stem_context_t * stem ) {
     248           0 :   (void)in_idx;
     249           0 :   (void)seq;
     250           0 :   (void)chunk;
     251           0 :   (void)tsorig;
     252           0 :   (void)stem;
     253             : 
     254           0 :   ulong proto = fd_disco_netmux_sig_proto( sig );
     255             : 
     256           0 :   if( FD_LIKELY( proto==DST_PROTO_TPU_QUIC ) ) {
     257           0 :     fd_aio_pkt_info_t pkt = { .buf = ctx->buffer, .buf_sz = (ushort)sz };
     258           0 :     fd_aio_send( ctx->quic_rx_aio, &pkt, 1, NULL, 1 );
     259           0 :   } else if( FD_LIKELY( proto==DST_PROTO_TPU_UDP ) ) {
     260           0 :     ulong network_hdr_sz = fd_disco_netmux_sig_hdr_sz( sig );
     261           0 :     if( FD_UNLIKELY( sz<=network_hdr_sz ) ) {
     262             :       /* Transaction not valid if the packet isn't large enough for the network
     263             :          headers. */
     264           0 :       FD_MCNT_INC( QUIC_TILE, NON_QUIC_PACKET_TOO_SMALL, 1UL );
     265           0 :       return;
     266           0 :     }
     267             : 
     268           0 :     ulong data_sz = sz - network_hdr_sz;
     269           0 :     if( FD_UNLIKELY( data_sz<FD_TXN_MIN_SERIALIZED_SZ ) ) {
     270             :       /* Smaller than the smallest possible transaction */
     271           0 :       FD_MCNT_INC( QUIC_TILE, NON_QUIC_PACKET_TOO_SMALL, 1UL );
     272           0 :       return;
     273           0 :     }
     274             : 
     275           0 :     if( FD_UNLIKELY( data_sz>FD_TPU_MTU ) ) {
     276             :       /* Transaction couldn't possibly be valid if it's longer than transaction
     277             :          MTU so drop it. This is not required, as the txn will fail to parse,
     278             :          but it's a nice short circuit. */
     279           0 :       FD_MCNT_INC( QUIC_TILE, NON_QUIC_PACKET_TOO_LARGE, 1UL );
     280           0 :       return;
     281           0 :     }
     282             : 
     283           0 :     legacy_stream_notify( ctx, ctx->buffer+network_hdr_sz, data_sz );
     284           0 :   }
     285           0 : }
     286             : 
     287             : /* quic_now is called by the QUIC engine to get the current timestamp in
     288             :    UNIX time.  */
     289             : 
     290             : static ulong
     291           0 : quic_now( void * ctx ) {
     292           0 :   (void)ctx;
     293           0 :   return (ulong)fd_log_wallclock();
     294           0 : }
     295             : 
     296             : /* quic_conn_new is invoked by the QUIC engine whenever a new connection
     297             :    is being established. */
     298             : static void
     299             : quic_conn_new( fd_quic_conn_t * conn,
     300           0 :                void *           _ctx ) {
     301           0 :   fd_quic_ctx_t * ctx = (fd_quic_ctx_t *)_ctx;
     302             : 
     303           0 :   conn->local_conn_id = ++ctx->conn_seq;
     304           0 : }
     305             : 
     306             : /* quic_stream_new is called back by the QUIC engine whenever an open
     307             :    connection creates a new stream, at the time this is called, both the
     308             :    client and server must have agreed to open the stream.  In case the
     309             :    client has opened this stream, it is assumed that the QUIC
     310             :    implementation has verified that the client has the necessary stream
     311             :    quota to do so. */
     312             : 
     313             : static void
     314             : quic_stream_new( fd_quic_stream_t * stream,
     315           0 :                  void *             _ctx ) {
     316             : 
     317             :   /* Load QUIC state */
     318             : 
     319           0 :   fd_quic_ctx_t * ctx = (fd_quic_ctx_t *)_ctx;
     320             : 
     321           0 :   ulong conn_id   = stream->conn->local_conn_id;
     322           0 :   ulong stream_id = stream->stream_id;
     323             : 
     324             :   /* Acquire reassembly slot */
     325             : 
     326           0 :   uint                  tsorig = (uint)fd_frag_meta_ts_comp( fd_tickcount() );
     327           0 :   fd_tpu_reasm_slot_t * slot   = fd_tpu_reasm_prepare( ctx->reasm, tsorig );
     328             : 
     329           0 :   slot->conn_id   = conn_id;
     330           0 :   slot->stream_id = stream_id;
     331             : 
     332             :   /* Wire up with QUIC stream */
     333             : 
     334           0 :   stream->context = slot;
     335             : 
     336             :   /* Wind up for next iteration */
     337             : 
     338           0 : }
     339             : 
     340             : /* quic_stream_receive is called back by the QUIC engine when any stream
     341             :    in any connection being serviced receives new data.  Currently we
     342             :    simply copy received data out of the xsk (network device memory) into
     343             :    a local dcache. */
     344             : 
     345             : static void
     346             : quic_stream_receive( fd_quic_stream_t * stream,
     347             :                      void *             stream_ctx,
     348             :                      uchar const *      data,
     349             :                      ulong              data_sz,
     350             :                      ulong              offset,
     351           0 :                      int                fin ) {
     352             : 
     353           0 :   (void)fin; /* TODO instantly publish if offset==0UL && fin */
     354             : 
     355             :   /* Load TPU state */
     356             : 
     357           0 :   fd_quic_t *           quic     = stream->conn->quic;
     358           0 :   fd_quic_ctx_t *       quic_ctx = quic->cb.quic_ctx;
     359           0 :   fd_tpu_reasm_t *      reasm    = quic_ctx->reasm;
     360           0 :   fd_tpu_reasm_slot_t * slot     = stream_ctx;
     361           0 :   fd_quic_ctx_t *       ctx    = quic->cb.quic_ctx;
     362             : 
     363             :   /* Check if reassembly slot is still valid */
     364             : 
     365           0 :   ulong conn_id   = stream->conn->local_conn_id;
     366           0 :   ulong stream_id = stream->stream_id;
     367             : 
     368           0 :   if( FD_UNLIKELY( ( slot->conn_id   != conn_id   ) |
     369           0 :                    ( slot->stream_id != stream_id ) ) ) {
     370           0 :     return;  /* clobbered */
     371           0 :   }
     372             : 
     373             :   /* Append data into chunk, we know this is valid */
     374             : 
     375           0 :   int add_err = fd_tpu_reasm_append( reasm, slot, data, data_sz, offset );
     376           0 :   ctx->metrics.reasm_append[ add_err ]++;
     377           0 : }
     378             : 
     379             : /* quic_stream_notify is called back by the QUIC implementation when a
     380             :    stream is finished.  This could either be because it completed
     381             :    successfully after reading valid data, or it was closed prematurely
     382             :    for some other reason.  All streams must eventually notify.
     383             : 
     384             :    If we see a successful QUIC stream notify, it means we have received
     385             :    a full transaction and should publish it downstream to be verified
     386             :    and executed. */
     387             : 
     388             : static void
     389             : quic_stream_notify( fd_quic_stream_t * stream,
     390             :                     void *             stream_ctx,
     391           0 :                     int                type ) {
     392             : 
     393             :   /* Load TPU state */
     394             : 
     395           0 :   fd_quic_t *           quic   = stream->conn->quic;
     396           0 :   fd_quic_ctx_t *       ctx    = quic->cb.quic_ctx;
     397           0 :   fd_tpu_reasm_t *      reasm  = ctx->reasm;
     398           0 :   fd_tpu_reasm_slot_t * slot   = stream_ctx;
     399           0 :   fd_stem_context_t *   stem   = ctx->stem;
     400           0 :   fd_frag_meta_t *      mcache = stem->mcaches[0];
     401           0 :   void *                base   = ctx->verify_out_mem;
     402             : 
     403             :   /* Check if reassembly slot is still valid */
     404             : 
     405           0 :   ulong conn_id   = stream->conn->local_conn_id;
     406           0 :   ulong stream_id = stream->stream_id;
     407             : 
     408           0 :   if( FD_UNLIKELY( ( slot->conn_id   != conn_id   ) |
     409           0 :                    ( slot->stream_id != stream_id ) ) ) {
     410           0 :     FD_MCNT_INC( QUIC_TILE, REASSEMBLY_NOTIFY_CLOBBERED, 1UL );
     411           0 :     return;  /* clobbered */
     412           0 :   }
     413             : 
     414             :   /* Abort reassembly slot if QUIC stream closes non-gracefully */
     415             : 
     416           0 :   if( FD_UNLIKELY( type!=FD_QUIC_STREAM_NOTIFY_END ) ) {
     417           0 :     FD_MCNT_INC( QUIC_TILE, REASSEMBLY_NOTIFY_ABORTED, 1UL );
     418           0 :     fd_tpu_reasm_cancel( reasm, slot );
     419           0 :     return;  /* not a successful stream close */
     420           0 :   }
     421             : 
     422             :   /* Publish message */
     423             : 
     424           0 :   ulong  seq   = stem->seqs[0];
     425           0 :   uint   tspub = (uint)fd_frag_meta_ts_comp( fd_tickcount() );
     426           0 :   int pub_err = fd_tpu_reasm_publish( reasm, slot, mcache, base, seq, tspub );
     427           0 :   ctx->metrics.reasm_publish[ pub_err ]++;
     428           0 :   if( FD_UNLIKELY( pub_err!=FD_TPU_REASM_SUCCESS ) ) return;
     429             : 
     430           0 :   fd_stem_advance( stem, 0UL );
     431           0 : }
     432             : 
     433             : static int
     434             : quic_tx_aio_send( void *                    _ctx,
     435             :                   fd_aio_pkt_info_t const * batch,
     436             :                   ulong                     batch_cnt,
     437             :                   ulong *                   opt_batch_idx,
     438           0 :                   int                       flush ) {
     439           0 :   (void)flush;
     440             : 
     441           0 :   fd_quic_ctx_t * ctx = (fd_quic_ctx_t *)_ctx;
     442             : 
     443           0 :   for( ulong i=0; i<batch_cnt; i++ ) {
     444           0 :     void * dst = fd_chunk_to_laddr( ctx->net_out_mem, ctx->net_out_chunk );
     445           0 :     fd_memcpy( dst, batch[ i ].buf, batch[ i ].buf_sz );
     446             : 
     447           0 :     uchar const * packet = dst;
     448           0 :     uchar const * packet_end = packet + batch[i].buf_sz;
     449           0 :     uchar const * iphdr = packet + 14U;
     450             : 
     451           0 :     uint test_ethip = ( (uint)packet[12] << 16u ) | ( (uint)packet[13] << 8u ) | (uint)packet[23];
     452           0 :     uint   ip_dstaddr  = 0;
     453           0 :     if( FD_LIKELY( test_ethip==0x080011 ) ) {
     454             :       /* IPv4 is variable-length, so lookup IHL to find start of UDP */
     455           0 :       uint iplen = ( ( (uint)iphdr[0] ) & 0x0FU ) * 4U;
     456           0 :       uchar const * udp = iphdr + iplen;
     457             : 
     458             :       /* Ignore if UDP header is too short */
     459           0 :       if( FD_UNLIKELY( udp+8U>packet_end ) ) {
     460           0 :         FD_MCNT_INC( QUIC_TILE, QUIC_PACKET_TOO_SMALL, 1UL );
     461           0 :         continue;
     462           0 :       }
     463             : 
     464             :       /* Extract IP dest addr and UDP dest port */
     465           0 :       ip_dstaddr  =                  *(uint   *)( iphdr+16UL );
     466           0 :     }
     467             : 
     468             :     /* send packets are just round-robined by sequence number, so for now
     469             :        just indicate where they came from so they don't bounce back */
     470           0 :     ulong sig = fd_disco_netmux_sig( 0U, 0U, ip_dstaddr, DST_PROTO_OUTGOING, FD_NETMUX_SIG_MIN_HDR_SZ );
     471             : 
     472           0 :     ulong tspub  = (ulong)fd_frag_meta_ts_comp( fd_tickcount() );
     473           0 :     fd_mcache_publish( ctx->net_out_mcache,
     474           0 :                        ctx->net_out_depth,
     475           0 :                        ctx->net_out_seq,
     476           0 :                        sig,
     477           0 :                        ctx->net_out_chunk,
     478           0 :                        batch[ i ].buf_sz,
     479           0 :                        0,
     480           0 :                        0,
     481           0 :                        tspub );
     482             : 
     483           0 :     ctx->net_out_seq   = fd_seq_inc( ctx->net_out_seq, 1UL );
     484           0 :     ctx->net_out_chunk = fd_dcache_compact_next( ctx->net_out_chunk, FD_NET_MTU, ctx->net_out_chunk0, ctx->net_out_wmark );
     485           0 :   }
     486             : 
     487           0 :   if( FD_LIKELY( opt_batch_idx ) ) {
     488           0 :     *opt_batch_idx = batch_cnt;
     489           0 :   }
     490             : 
     491           0 :   return FD_AIO_SUCCESS;
     492           0 : }
     493             : 
     494             : static void
     495             : privileged_init( fd_topo_t *      topo,
     496           0 :                  fd_topo_tile_t * tile ) {
     497           0 :   (void)topo; (void)tile;
     498             : 
     499             :   /* The fd_quic implementation calls fd_log_wallclock() internally
     500             :      which itself calls clock_gettime() which on most kernels is not a
     501             :      real syscall but a virtual one in the process via. the vDSO.
     502             : 
     503             :      The first time this virtual call is made to the vDSO it does an
     504             :      mmap(2) of some shared memory into userspace, which cannot
     505             :      happen while sandboxed so we need to ensure that initialization
     506             :      happens here. */
     507             : 
     508           0 :   fd_log_wallclock();
     509           0 : }
     510             : 
     511             : static void
     512             : quic_tls_cv_sign( void *      signer_ctx,
     513             :                   uchar       signature[ static 64 ],
     514           0 :                   uchar const payload[ static 130 ] ) {
     515           0 :   fd_quic_ctx_t * ctx = signer_ctx;
     516           0 :   fd_sha512_t * sha512 = fd_sha512_join( ctx->sha512 );
     517           0 :   fd_ed25519_sign( signature, payload, 130UL, ctx->tls_pub_key, ctx->tls_priv_key, sha512 );
     518           0 :   fd_sha512_leave( sha512 );
     519           0 : }
     520             : 
     521             : static void
     522             : unprivileged_init( fd_topo_t *      topo,
     523           0 :                    fd_topo_tile_t * tile ) {
     524           0 :   void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
     525             : 
     526           0 :   if( FD_UNLIKELY( tile->in_cnt<1UL ||
     527           0 :                    strcmp( topo->links[ tile->in_link_id[ 0UL ] ].name, "net_quic" ) ) )
     528           0 :     FD_LOG_ERR(( "quic tile has none or unexpected input links %lu %s %s",
     529           0 :                  tile->in_cnt, topo->links[ tile->in_link_id[ 0 ] ].name, topo->links[ tile->in_link_id[ 1 ] ].name ));
     530             : 
     531           0 :   if( FD_UNLIKELY( tile->out_cnt!=2UL ||
     532           0 :                    strcmp( topo->links[ tile->out_link_id[ 0UL ] ].name, "quic_verify" ) ||
     533           0 :                    strcmp( topo->links[ tile->out_link_id[ 1UL ] ].name, "quic_net" ) ) )
     534           0 :     FD_LOG_ERR(( "quic tile has none or unexpected output links %lu %s %s",
     535           0 :                  tile->out_cnt, topo->links[ tile->out_link_id[ 0 ] ].name, topo->links[ tile->out_link_id[ 1 ] ].name ));
     536             : 
     537           0 :   if( FD_UNLIKELY( !tile->in_cnt ) ) FD_LOG_ERR(( "quic tile in cnt is zero" ));
     538             : 
     539           0 :   ulong depth = tile->quic.depth;
     540           0 :   if( topo->links[ tile->out_link_id[ 0 ] ].depth != depth )
     541           0 :     FD_LOG_ERR(( "quic tile in depths are not equal" ));
     542             : 
     543           0 :   FD_SCRATCH_ALLOC_INIT( l, scratch );
     544           0 :   fd_quic_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_quic_ctx_t ), sizeof( fd_quic_ctx_t ) );
     545             : 
     546             :   /* End privileged allocs */
     547             : 
     548           0 :   FD_TEST( getrandom( ctx->tls_priv_key, ED25519_PRIV_KEY_SZ, 0 )==ED25519_PRIV_KEY_SZ );
     549           0 :   fd_sha512_t * sha512 = fd_sha512_join( fd_sha512_new( ctx->sha512 ) );
     550           0 :   fd_ed25519_public_from_private( ctx->tls_pub_key, ctx->tls_priv_key, sha512 );
     551           0 :   fd_sha512_leave( sha512 );
     552             : 
     553           0 :   fd_aio_t * quic_tx_aio = fd_aio_join( fd_aio_new( ctx->quic_tx_aio, ctx, quic_tx_aio_send ) );
     554           0 :   if( FD_UNLIKELY( !quic_tx_aio ) ) FD_LOG_ERR(( "fd_aio_join failed" ));
     555             : 
     556           0 :   fd_quic_limits_t limits = quic_limits( tile );
     557           0 :   fd_quic_t * quic = fd_quic_join( fd_quic_new( FD_SCRATCH_ALLOC_APPEND( l, fd_quic_align(), fd_quic_footprint( &limits ) ), &limits ) );
     558           0 :   if( FD_UNLIKELY( !quic ) ) FD_LOG_ERR(( "fd_quic_join failed" ));
     559             : 
     560           0 :   if( FD_UNLIKELY( tile->quic.ack_delay_millis == 0 ) ) {
     561           0 :     FD_LOG_ERR(( "Invalid `ack_delay_millis`: must be greater than zero" ));
     562           0 :   }
     563           0 :   if( FD_UNLIKELY( tile->quic.ack_delay_millis >= tile->quic.idle_timeout_millis ) ) {
     564           0 :     FD_LOG_ERR(( "Invalid `ack_delay_millis`: must be lower than `idle_timeout_millis`" ));
     565           0 :   }
     566             : 
     567           0 :   quic->config.role                       = FD_QUIC_ROLE_SERVER;
     568           0 :   quic->config.net.ip_addr                = tile->quic.ip_addr;
     569           0 :   quic->config.net.listen_udp_port        = tile->quic.quic_transaction_listen_port;
     570           0 :   quic->config.idle_timeout               = tile->quic.idle_timeout_millis * 1000000UL;
     571           0 :   quic->config.ack_delay                  = tile->quic.ack_delay_millis * 1000000UL;
     572           0 :   quic->config.initial_rx_max_stream_data = FD_TXN_MTU;
     573           0 :   quic->config.retry                      = tile->quic.retry;
     574           0 :   fd_memcpy( quic->config.link.src_mac_addr, tile->quic.src_mac_addr, 6 );
     575           0 :   fd_memcpy( quic->config.identity_public_key, ctx->tls_pub_key, ED25519_PUB_KEY_SZ );
     576             : 
     577           0 :   quic->config.sign         = quic_tls_cv_sign;
     578           0 :   quic->config.sign_ctx     = ctx;
     579             : 
     580           0 :   quic->cb.conn_new         = quic_conn_new;
     581           0 :   quic->cb.conn_hs_complete = NULL;
     582           0 :   quic->cb.conn_final       = NULL;
     583           0 :   quic->cb.stream_new       = quic_stream_new;
     584           0 :   quic->cb.stream_receive   = quic_stream_receive;
     585           0 :   quic->cb.stream_notify    = quic_stream_notify;
     586           0 :   quic->cb.now              = quic_now;
     587           0 :   quic->cb.now_ctx          = NULL;
     588           0 :   quic->cb.quic_ctx         = ctx;
     589             : 
     590           0 :   fd_quic_set_aio_net_tx( quic, quic_tx_aio );
     591           0 :   if( FD_UNLIKELY( !fd_quic_init( quic ) ) ) FD_LOG_ERR(( "fd_quic_init failed" ));
     592             : 
     593             :   /* Put a bound on chunks we read from the input, to make sure they
     594             :       are within in the data region of the workspace. */
     595           0 :   fd_topo_link_t * link0 = &topo->links[ tile->in_link_id[ 0 ] ];
     596             : 
     597           0 :   for( ulong i=1UL; i<tile->in_cnt; i++ ) {
     598           0 :     fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
     599             : 
     600           0 :     if( FD_UNLIKELY( !tile->in_link_poll[ i ] ) ) continue;
     601             : 
     602           0 :     if( FD_UNLIKELY( topo->objs[ link0->dcache_obj_id ].wksp_id!=topo->objs[ link->dcache_obj_id ].wksp_id ) ) FD_LOG_ERR(( "quic tile reads input from multiple workspaces" ));
     603           0 :     if( FD_UNLIKELY( link0->mtu!=link->mtu         ) ) FD_LOG_ERR(( "quic tile reads input from multiple links with different MTUs" ));
     604           0 :   }
     605             : 
     606           0 :   ctx->in_mem    = topo->workspaces[ topo->objs[ link0->dcache_obj_id ].wksp_id ].wksp;
     607           0 :   ctx->in_chunk0 = fd_disco_compact_chunk0( ctx->in_mem );
     608           0 :   ctx->in_wmark  = fd_disco_compact_wmark ( ctx->in_mem, link0->mtu );
     609             : 
     610           0 :   fd_topo_link_t * net_out = &topo->links[ tile->out_link_id[ 1 ] ];
     611             : 
     612           0 :   ctx->net_out_mcache = net_out->mcache;
     613           0 :   ctx->net_out_sync   = fd_mcache_seq_laddr( ctx->net_out_mcache );
     614           0 :   ctx->net_out_depth  = fd_mcache_depth( ctx->net_out_mcache );
     615           0 :   ctx->net_out_seq    = fd_mcache_seq_query( ctx->net_out_sync );
     616           0 :   ctx->net_out_chunk0 = fd_dcache_compact_chunk0( fd_wksp_containing( net_out->dcache ), net_out->dcache );
     617           0 :   ctx->net_out_mem    = topo->workspaces[ topo->objs[ net_out->dcache_obj_id ].wksp_id ].wksp;
     618           0 :   ctx->net_out_wmark  = fd_dcache_compact_wmark ( ctx->net_out_mem, net_out->dcache, net_out->mtu );
     619           0 :   ctx->net_out_chunk  = ctx->net_out_chunk0;
     620             : 
     621           0 :   fd_topo_link_t * verify_out = &topo->links[ tile->out_link_id[ 0 ] ];
     622             : 
     623           0 :   ctx->verify_out_mem = topo->workspaces[ topo->objs[ verify_out->reasm_obj_id ].wksp_id ].wksp;
     624             : 
     625           0 :   ctx->reasm = verify_out->reasm;
     626           0 :   if( FD_UNLIKELY( !ctx->reasm ) )
     627           0 :     FD_LOG_ERR(( "invalid tpu_reasm parameters" ));
     628             : 
     629           0 :   ctx->conn_seq    = 0UL;
     630             : 
     631           0 :   ctx->quic        = quic;
     632           0 :   ctx->quic_rx_aio = fd_quic_get_aio_net_rx( quic );
     633             : 
     634           0 :   ctx->round_robin_cnt = fd_topo_tile_name_cnt( topo, tile->name );
     635           0 :   ctx->round_robin_id  = tile->kind_id;
     636             : 
     637           0 :   ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, 1UL );
     638           0 :   if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
     639           0 :     FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
     640           0 : }
     641             : 
     642             : static ulong
     643             : populate_allowed_seccomp( fd_topo_t const *      topo,
     644             :                           fd_topo_tile_t const * tile,
     645             :                           ulong                  out_cnt,
     646           0 :                           struct sock_filter *   out ) {
     647           0 :   (void)topo;
     648           0 :   (void)tile;
     649             : 
     650           0 :   populate_sock_filter_policy_quic( out_cnt, out, (uint)fd_log_private_logfile_fd() );
     651           0 :   return sock_filter_policy_quic_instr_cnt;
     652           0 : }
     653             : 
     654             : static ulong
     655             : populate_allowed_fds( fd_topo_t const *      topo,
     656             :                       fd_topo_tile_t const * tile,
     657             :                       ulong                  out_fds_cnt,
     658           0 :                       int *                  out_fds ) {
     659           0 :   (void)topo;
     660           0 :   (void)tile;
     661             : 
     662           0 :   if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
     663             : 
     664           0 :   ulong out_cnt = 0UL;
     665           0 :   out_fds[ out_cnt++ ] = 2; /* stderr */
     666           0 :   if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
     667           0 :     out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
     668           0 :   return out_cnt;
     669           0 : }
     670             : 
     671           0 : #define STEM_BURST (1UL)
     672             : 
     673           0 : #define STEM_CALLBACK_CONTEXT_TYPE  fd_quic_ctx_t
     674           0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_quic_ctx_t)
     675             : 
     676           0 : #define STEM_CALLBACK_DURING_HOUSEKEEPING during_housekeeping
     677           0 : #define STEM_CALLBACK_METRICS_WRITE       metrics_write
     678           0 : #define STEM_CALLBACK_BEFORE_CREDIT       before_credit
     679           0 : #define STEM_CALLBACK_BEFORE_FRAG         before_frag
     680           0 : #define STEM_CALLBACK_DURING_FRAG         during_frag
     681           0 : #define STEM_CALLBACK_AFTER_FRAG          after_frag
     682             : 
     683             : #include "../../../../disco/stem/fd_stem.c"
     684             : 
     685             : fd_topo_run_tile_t fd_tile_quic = {
     686             :   .name                     = "quic",
     687             :   .populate_allowed_seccomp = populate_allowed_seccomp,
     688             :   .populate_allowed_fds     = populate_allowed_fds,
     689             :   .scratch_align            = scratch_align,
     690             :   .scratch_footprint        = scratch_footprint,
     691             :   .privileged_init          = privileged_init,
     692             :   .unprivileged_init        = unprivileged_init,
     693             :   .run                      = stem_run,
     694             : };

Generated by: LCOV version 1.14