LCOV - code coverage report
Current view: top level - disco/quic - fd_quic_tile.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 399 0.0 %
Date: 2025-07-01 05:00:49 Functions: 0 19 0.0 %

          Line data    Source code
       1             : #include "fd_quic_tile.h"
       2             : #include "../metrics/fd_metrics.h"
       3             : #include "../stem/fd_stem.h"
       4             : #include "../topo/fd_topo.h"
       5             : #include "fd_tpu.h"
       6             : #include "../../waltz/quic/fd_quic_private.h"
       7             : #include "generated/quic_seccomp.h"
       8             : #include "../../util/net/fd_eth.h"
       9             : 
      10             : #include <errno.h>
      11             : #include <linux/unistd.h>
      12             : #include <sys/random.h>
      13             : 
      14             : /* fd_quic provides a TPU server tile.
      15             : 
      16             :    This tile handles incoming transactions that clients request to be
      17             :    included in blocks.  Supported protocols currently include TPU/UDP
      18             :    and TPU/QUIC.
      19             : 
      20             :    The fd_quic tile acts as a plain old Tango producer writing to a cnc
      21             :    and an mcache.  The tile will defragment multi-packet TPU streams
      22             :    coming in from QUIC, such that each mcache/dcache pair forms a
      23             :    complete txn.  This requires the dcache mtu to be at least that of
      24             :    the largest allowed serialized txn size.
      25             : 
      26             :    QUIC tiles don't service network devices directly, but rely on
      27             :    packets being received by net tiles and forwarded on via. a mux
      28             :    (multiplexer).  An arbitrary number of QUIC tiles can be run.  Each
      29             :    UDP flow must stick to one QUIC tile. */
      30             : 
      31             : static inline fd_quic_limits_t
      32           0 : quic_limits( fd_topo_tile_t const * tile ) {
      33           0 :   fd_quic_limits_t limits = {
      34           0 :     .conn_cnt      = tile->quic.max_concurrent_connections,
      35           0 :     .handshake_cnt = tile->quic.max_concurrent_handshakes,
      36             : 
      37             :     /* fd_quic will not issue nor use any new connection IDs after
      38             :        completing a handshake.  Connection migration is not supported
      39             :        either. */
      40           0 :     .conn_id_cnt                 = FD_QUIC_MIN_CONN_ID_CNT,
      41           0 :     .inflight_frame_cnt          = 64UL * tile->quic.max_concurrent_connections,
      42           0 :     .min_inflight_frame_cnt_conn = 32UL
      43           0 :   };
      44           0 :   if( FD_UNLIKELY( !fd_quic_footprint( &limits ) ) ) {
      45           0 :     FD_LOG_ERR(( "Invalid QUIC limits in config" ));
      46           0 :   }
      47           0 :   return limits;
      48           0 : }
      49             : 
      50             : FD_FN_CONST static inline ulong
      51           0 : scratch_align( void ) {
      52           0 :   return fd_ulong_max( alignof(fd_quic_ctx_t), fd_quic_align() );
      53           0 : }
      54             : 
      55             : static inline ulong
      56           0 : scratch_footprint( fd_topo_tile_t const * tile ) {
      57           0 :   ulong out_depth = tile->quic.out_depth;
      58           0 :   ulong reasm_max = tile->quic.reasm_cnt;
      59             : 
      60           0 :   fd_quic_limits_t limits = quic_limits( tile ); /* May FD_LOG_ERR */
      61           0 :   ulong            l      = FD_LAYOUT_INIT;
      62           0 :   l = FD_LAYOUT_APPEND( l, alignof( fd_quic_ctx_t ), sizeof( fd_quic_ctx_t )                        );
      63           0 :   l = FD_LAYOUT_APPEND( l, fd_quic_align(),          fd_quic_footprint( &limits )                   );
      64           0 :   l = FD_LAYOUT_APPEND( l, fd_tpu_reasm_align(),     fd_tpu_reasm_footprint( out_depth, reasm_max ) );
      65           0 :   return FD_LAYOUT_FINI( l, scratch_align() );
      66           0 : }
      67             : 
      68             : /* legacy_stream_notify is called for transactions sent via TPU/UDP. For
      69             :    now both QUIC and non-QUIC transactions are accepted, with traffic
      70             :    type determined by port.
      71             : 
      72             :    UDP transactions must fit in one packet and cannot be fragmented, and
      73             :    notify here means the entire packet was received. */
      74             : 
      75             : static void
      76             : legacy_stream_notify( fd_quic_ctx_t * ctx,
      77             :                       uchar *         packet,
      78           0 :                       ulong           packet_sz ) {
      79             : 
      80           0 :   long                tspub    = fd_tickcount();
      81           0 :   fd_tpu_reasm_t *    reasm    = ctx->reasm;
      82           0 :   fd_stem_context_t * stem     = ctx->stem;
      83           0 :   fd_frag_meta_t *    mcache   = stem->mcaches[0];
      84           0 :   void *              base     = ctx->verify_out_mem;
      85           0 :   ulong               seq      = stem->seqs[0];
      86             : 
      87           0 :   int err = fd_tpu_reasm_publish_fast( reasm, packet, packet_sz, mcache, base, seq, tspub );
      88           0 :   if( FD_LIKELY( err==FD_TPU_REASM_SUCCESS ) ) {
      89           0 :     fd_stem_advance( stem, 0UL );
      90           0 :     ctx->metrics.txns_received_udp++;
      91           0 :   }
      92           0 : }
      93             : 
      94             : /* Because of the separate mcache for publishing network fragments
      95             :    back to networking tiles, which is not managed by the mux, we
      96             :    need to periodically update the sync. */
      97             : static void
      98           0 : during_housekeeping( fd_quic_ctx_t * ctx ) {
      99           0 :   fd_mcache_seq_update( ctx->net_out_sync, ctx->net_out_seq );
     100           0 : }
     101             : 
     102             : /* This tile always publishes messages downstream, even if there are
     103             :    no credits available.  It ignores the flow control of the downstream
     104             :    verify tile.  This is OK as the verify tile is written to expect
     105             :    this behavior, and enables the QUIC tile to publish as fast as it
     106             :    can.  It would currently be difficult trying to backpressure further
     107             :    up the stack to the network itself. */
     108             : static inline void
     109             : before_credit( fd_quic_ctx_t *     ctx,
     110             :                fd_stem_context_t * stem,
     111           0 :                int *               charge_busy ) {
     112           0 :   ctx->stem = stem;
     113             : 
     114             :   /* Publishes to mcache via callbacks */
     115           0 :   *charge_busy = fd_quic_service( ctx->quic );
     116           0 : }
     117             : 
     118             : static inline void
     119           0 : metrics_write( fd_quic_ctx_t * ctx ) {
     120           0 :   FD_MCNT_SET  ( QUIC, TXNS_RECEIVED_UDP,       ctx->metrics.txns_received_udp );
     121           0 :   FD_MCNT_SET  ( QUIC, TXNS_RECEIVED_QUIC_FAST, ctx->metrics.txns_received_quic_fast );
     122           0 :   FD_MCNT_SET  ( QUIC, TXNS_RECEIVED_QUIC_FRAG, ctx->metrics.txns_received_quic_frag );
     123           0 :   FD_MCNT_SET  ( QUIC, FRAGS_OK,                ctx->metrics.frag_ok_cnt );
     124           0 :   FD_MCNT_SET  ( QUIC, FRAGS_GAP,               ctx->metrics.frag_gap_cnt );
     125           0 :   FD_MCNT_SET  ( QUIC, FRAGS_DUP,               ctx->metrics.frag_dup_cnt );
     126           0 :   FD_MCNT_SET  ( QUIC, TXNS_OVERRUN,            ctx->metrics.reasm_overrun );
     127           0 :   FD_MCNT_SET  ( QUIC, TXNS_ABANDONED,          ctx->metrics.reasm_abandoned );
     128           0 :   FD_MCNT_SET  ( QUIC, TXN_REASMS_STARTED,      ctx->metrics.reasm_started );
     129           0 :   FD_MGAUGE_SET( QUIC, TXN_REASMS_ACTIVE,       (ulong)fd_long_max( ctx->metrics.reasm_active, 0L ) );
     130             : 
     131           0 :   FD_MCNT_SET( QUIC, LEGACY_TXN_UNDERSZ, ctx->metrics.udp_pkt_too_small );
     132           0 :   FD_MCNT_SET( QUIC, LEGACY_TXN_OVERSZ,  ctx->metrics.udp_pkt_too_large );
     133           0 :   FD_MCNT_SET( QUIC, TXN_UNDERSZ,        ctx->metrics.quic_txn_too_small );
     134           0 :   FD_MCNT_SET( QUIC, TXN_OVERSZ,         ctx->metrics.quic_txn_too_large );
     135             : 
     136           0 :   FD_MCNT_SET(   QUIC, RECEIVED_PACKETS, ctx->quic->metrics.net_rx_pkt_cnt );
     137           0 :   FD_MCNT_SET(   QUIC, RECEIVED_BYTES,   ctx->quic->metrics.net_rx_byte_cnt );
     138           0 :   FD_MCNT_SET(   QUIC, SENT_PACKETS,     ctx->quic->metrics.net_tx_pkt_cnt );
     139           0 :   FD_MCNT_SET(   QUIC, SENT_BYTES,       ctx->quic->metrics.net_tx_byte_cnt );
     140           0 :   FD_MCNT_SET(   QUIC, RETRY_SENT,       ctx->quic->metrics.retry_tx_cnt );
     141             : 
     142           0 :   FD_MGAUGE_SET( QUIC, CONNECTIONS_ACTIVE,  ctx->quic->metrics.conn_active_cnt );
     143           0 :   FD_MCNT_SET(   QUIC, CONNECTIONS_CREATED, ctx->quic->metrics.conn_created_cnt );
     144           0 :   FD_MCNT_SET(   QUIC, CONNECTIONS_CLOSED,  ctx->quic->metrics.conn_closed_cnt );
     145           0 :   FD_MCNT_SET(   QUIC, CONNECTIONS_ABORTED, ctx->quic->metrics.conn_aborted_cnt );
     146           0 :   FD_MCNT_SET(   QUIC, CONNECTIONS_TIMED_OUT, ctx->quic->metrics.conn_timeout_cnt );
     147           0 :   FD_MCNT_SET(   QUIC, CONNECTIONS_RETRIED, ctx->quic->metrics.conn_retry_cnt );
     148             : 
     149           0 :   FD_MCNT_SET(   QUIC, CONNECTION_ERROR_NO_SLOTS,   ctx->quic->metrics.conn_err_no_slots_cnt );
     150           0 :   FD_MCNT_SET(   QUIC, CONNECTION_ERROR_RETRY_FAIL, ctx->quic->metrics.conn_err_retry_fail_cnt );
     151             : 
     152           0 :   FD_MCNT_ENUM_COPY( QUIC, PKT_CRYPTO_FAILED,   ctx->quic->metrics.pkt_decrypt_fail_cnt );
     153           0 :   FD_MCNT_ENUM_COPY( QUIC, PKT_NO_KEY,          ctx->quic->metrics.pkt_no_key_cnt );
     154           0 :   FD_MCNT_SET(       QUIC, PKT_NO_CONN,         ctx->quic->metrics.pkt_no_conn_cnt );
     155           0 :   FD_MCNT_ENUM_COPY( QUIC, FRAME_TX_ALLOC,        ctx->quic->metrics.frame_tx_alloc_cnt );
     156           0 :   FD_MCNT_SET(       QUIC, PKT_NET_HEADER_INVALID,  ctx->quic->metrics.pkt_net_hdr_err_cnt );
     157           0 :   FD_MCNT_SET(       QUIC, PKT_QUIC_HEADER_INVALID, ctx->quic->metrics.pkt_quic_hdr_err_cnt );
     158           0 :   FD_MCNT_SET(       QUIC, PKT_UNDERSZ,         ctx->quic->metrics.pkt_undersz_cnt );
     159           0 :   FD_MCNT_SET(       QUIC, PKT_OVERSZ,          ctx->quic->metrics.pkt_oversz_cnt );
     160           0 :   FD_MCNT_SET(       QUIC, PKT_VERNEG,          ctx->quic->metrics.pkt_verneg_cnt );
     161           0 :   FD_MCNT_SET(       QUIC, PKT_RETRANSMISSIONS, ctx->quic->metrics.pkt_retransmissions_cnt );
     162             : 
     163           0 :   FD_MCNT_SET(   QUIC, HANDSHAKES_CREATED,         ctx->quic->metrics.hs_created_cnt );
     164           0 :   FD_MCNT_SET(   QUIC, HANDSHAKE_ERROR_ALLOC_FAIL, ctx->quic->metrics.hs_err_alloc_fail_cnt );
     165           0 :   FD_MCNT_SET(   QUIC, HANDSHAKE_EVICTED,          ctx->quic->metrics.hs_evicted_cnt );
     166             : 
     167           0 :   FD_MCNT_SET(  QUIC, STREAM_RECEIVED_EVENTS, ctx->quic->metrics.stream_rx_event_cnt );
     168           0 :   FD_MCNT_SET(  QUIC, STREAM_RECEIVED_BYTES,  ctx->quic->metrics.stream_rx_byte_cnt );
     169             : 
     170           0 :   FD_MCNT_ENUM_COPY( QUIC, RECEIVED_FRAMES,  ctx->quic->metrics.frame_rx_cnt );
     171           0 :   FD_MCNT_SET      ( QUIC, FRAME_FAIL_PARSE, ctx->quic->metrics.frame_rx_err_cnt );
     172             : 
     173           0 :   FD_MCNT_ENUM_COPY( QUIC, ACK_TX, ctx->quic->metrics.ack_tx );
     174             : 
     175           0 :   FD_MHIST_COPY( QUIC, SERVICE_DURATION_SECONDS, ctx->quic->metrics.service_duration );
     176           0 :   FD_MHIST_COPY( QUIC, RECEIVE_DURATION_SECONDS, ctx->quic->metrics.receive_duration );
     177           0 : }
     178             : 
     179             : static int
     180             : before_frag( fd_quic_ctx_t * ctx,
     181             :              ulong           in_idx,
     182             :              ulong           seq,
     183           0 :              ulong           sig ) {
     184           0 :   (void)in_idx;
     185           0 :   (void)seq;
     186             : 
     187           0 :   ulong proto = fd_disco_netmux_sig_proto( sig );
     188           0 :   if( FD_UNLIKELY( proto!=DST_PROTO_TPU_UDP && proto!=DST_PROTO_TPU_QUIC ) ) return 1;
     189             : 
     190           0 :   ulong hash = fd_disco_netmux_sig_hash( sig );
     191           0 :   if( FD_UNLIKELY( (hash % ctx->round_robin_cnt) != ctx->round_robin_id ) ) return 1;
     192             : 
     193           0 :   return 0;
     194           0 : }
     195             : 
     196             : static void
     197             : during_frag( fd_quic_ctx_t * ctx,
     198             :              ulong           in_idx,
     199             :              ulong           seq    FD_PARAM_UNUSED,
     200             :              ulong           sig    FD_PARAM_UNUSED,
     201             :              ulong           chunk,
     202             :              ulong           sz,
     203           0 :              ulong           ctl ) {
     204           0 :   void const * src = fd_net_rx_translate_frag( &ctx->net_in_bounds[ in_idx ], chunk, ctl, sz );
     205             : 
     206             :   /* FIXME this copy could be eliminated by combining it with the decrypt operation */
     207           0 :   fd_memcpy( ctx->buffer, src, sz );
     208           0 : }
     209             : 
     210             : static void
     211             : after_frag( fd_quic_ctx_t *     ctx,
     212             :             ulong               in_idx,
     213             :             ulong               seq,
     214             :             ulong               sig,
     215             :             ulong               sz,
     216             :             ulong               tsorig,
     217             :             ulong               tspub,
     218           0 :             fd_stem_context_t * stem ) {
     219           0 :   (void)in_idx;
     220           0 :   (void)seq;
     221           0 :   (void)tsorig;
     222           0 :   (void)tspub;
     223           0 :   (void)stem;
     224             : 
     225           0 :   ulong proto = fd_disco_netmux_sig_proto( sig );
     226             : 
     227           0 :   if( FD_LIKELY( proto==DST_PROTO_TPU_QUIC ) ) {
     228           0 :     if( FD_UNLIKELY( sz<sizeof(fd_eth_hdr_t) ) ) FD_LOG_ERR(( "QUIC packet too small" ));
     229           0 :     uchar * ip_pkt = ctx->buffer + sizeof(fd_eth_hdr_t);
     230           0 :     ulong   ip_sz  = sz - sizeof(fd_eth_hdr_t);
     231             : 
     232           0 :     fd_quic_t * quic = ctx->quic;
     233           0 :     long dt = -fd_tickcount();
     234           0 :     fd_quic_process_packet( quic, ip_pkt, ip_sz );
     235           0 :     dt += fd_tickcount();
     236           0 :     fd_histf_sample( quic->metrics.receive_duration, (ulong)dt );
     237           0 :     quic->metrics.net_rx_byte_cnt += sz;
     238           0 :     quic->metrics.net_rx_pkt_cnt++;
     239           0 :   } else if( FD_LIKELY( proto==DST_PROTO_TPU_UDP ) ) {
     240           0 :     ulong network_hdr_sz = fd_disco_netmux_sig_hdr_sz( sig );
     241           0 :     if( FD_UNLIKELY( sz<=network_hdr_sz ) ) {
     242             :       /* Transaction not valid if the packet isn't large enough for the network
     243             :          headers. */
     244           0 :       ctx->metrics.udp_pkt_too_small++;
     245           0 :       return;
     246           0 :     }
     247             : 
     248           0 :     ulong data_sz = sz - network_hdr_sz;
     249           0 :     if( FD_UNLIKELY( data_sz<FD_TXN_MIN_SERIALIZED_SZ ) ) {
     250             :       /* Smaller than the smallest possible transaction */
     251           0 :       ctx->metrics.udp_pkt_too_small++;
     252           0 :       return;
     253           0 :     }
     254             : 
     255           0 :     if( FD_UNLIKELY( data_sz>FD_TPU_MTU ) ) {
     256             :       /* Transaction couldn't possibly be valid if it's longer than transaction
     257             :          MTU so drop it. This is not required, as the txn will fail to parse,
     258             :          but it's a nice short circuit. */
     259           0 :       ctx->metrics.udp_pkt_too_large++;
     260           0 :       return;
     261           0 :     }
     262             : 
     263           0 :     legacy_stream_notify( ctx, ctx->buffer+network_hdr_sz, data_sz );
     264           0 :   }
     265           0 : }
     266             : 
     267             : static ulong
     268           0 : quic_now( void * ctx FD_PARAM_UNUSED ) {
     269           0 :   return (ulong)fd_tickcount();
     270           0 : }
     271             : 
     272             : static void
     273             : quic_conn_final( fd_quic_conn_t * conn,
     274           0 :                  void *           quic_ctx ) {
     275           0 :   fd_quic_ctx_t * ctx = quic_ctx;
     276           0 :   long abandon_cnt = fd_long_max( conn->srx->rx_streams_active, 0L );
     277           0 :   ctx->metrics.reasm_active    -= abandon_cnt;
     278           0 :   ctx->metrics.reasm_abandoned += (ulong)abandon_cnt;
     279           0 : }
     280             : 
     281             : static int
     282             : quic_stream_rx( fd_quic_conn_t * conn,
     283             :                 ulong            stream_id,
     284             :                 ulong            offset,
     285             :                 uchar const *    data,
     286             :                 ulong            data_sz,
     287           0 :                 int              fin ) {
     288             : 
     289           0 :   long                tspub    = fd_tickcount();
     290           0 :   fd_quic_t *         quic     = conn->quic;
     291           0 :   fd_quic_state_t *   state    = fd_quic_get_state( quic );  /* ugly */
     292           0 :   fd_quic_ctx_t *     ctx      = quic->cb.quic_ctx;
     293           0 :   fd_tpu_reasm_t *    reasm    = ctx->reasm;
     294           0 :   ulong               conn_uid = fd_quic_conn_uid( conn );
     295           0 :   fd_stem_context_t * stem     = ctx->stem;
     296           0 :   fd_frag_meta_t *    mcache   = stem->mcaches[0];
     297           0 :   void *              base     = ctx->verify_out_mem;
     298           0 :   ulong               seq      = stem->seqs[0];
     299             : 
     300           0 :   int oversz = offset+data_sz > FD_TPU_MTU;
     301             : 
     302           0 :   if( offset==0UL && fin ) {
     303             :     /* Fast path */
     304           0 :     if( FD_UNLIKELY( data_sz<FD_TXN_MIN_SERIALIZED_SZ ) ) {
     305           0 :       ctx->metrics.quic_txn_too_small++;
     306           0 :       return FD_QUIC_SUCCESS; /* drop */
     307           0 :     }
     308           0 :     if( FD_UNLIKELY( oversz ) ) {
     309           0 :       ctx->metrics.quic_txn_too_large++;
     310           0 :       return FD_QUIC_SUCCESS; /* drop */
     311           0 :     }
     312           0 :     int err = fd_tpu_reasm_publish_fast( reasm, data, data_sz, mcache, base, seq, tspub );
     313           0 :     if( FD_LIKELY( err==FD_TPU_REASM_SUCCESS ) ) {
     314           0 :       fd_stem_advance( stem, 0UL );
     315           0 :       ctx->metrics.txns_received_quic_fast++;
     316           0 :     }
     317           0 :     return FD_QUIC_SUCCESS;
     318           0 :   }
     319             : 
     320           0 :   if( data_sz==0UL && !fin ) return FD_QUIC_SUCCESS; /* noop */
     321             : 
     322           0 :   fd_tpu_reasm_slot_t * slot = fd_tpu_reasm_query( reasm, conn_uid, stream_id );
     323             : 
     324           0 :   if( !slot ) { /* start a new reassembly */
     325           0 :     if( offset>0 ) {
     326           0 :       ctx->metrics.frag_gap_cnt++;
     327           0 :       return FD_QUIC_SUCCESS;
     328           0 :     }
     329           0 :     if( data_sz==0 ) return FD_QUIC_SUCCESS; /* ignore empty */
     330           0 :     if( FD_UNLIKELY( oversz ) ) {
     331           0 :       ctx->metrics.quic_txn_too_large++;
     332           0 :       return FD_QUIC_SUCCESS; /* drop */
     333           0 :     }
     334             : 
     335             :     /* Was the reasm buffer we evicted busy? */
     336           0 :     fd_tpu_reasm_slot_t * victim      = fd_tpu_reasm_peek_tail( reasm );
     337           0 :     int                   victim_busy = victim->k.state == FD_TPU_REASM_STATE_BUSY;
     338             : 
     339             :     /* If so, does the connection it refers to still exist?
     340             :        (Or was the buffer previously abandoned by means of conn close) */
     341           0 :     uint             victim_cidx   = fd_quic_conn_uid_idx( victim->k.conn_uid );
     342           0 :     uint             victim_gen    = fd_quic_conn_uid_gen( victim->k.conn_uid );
     343           0 :     fd_quic_conn_t * victim_conn   = fd_quic_conn_at_idx( state, victim_cidx ); /* possibly oob */
     344           0 :     if( victim_busy ) {
     345           0 :       uint victim_exists = (victim_conn->conn_gen == victim_gen) &
     346           0 :                            (victim_conn->state == FD_QUIC_CONN_STATE_ACTIVE); /* in [0,1] */
     347           0 :       victim_conn->srx->rx_streams_active -= victim_exists;
     348           0 :       ctx->metrics.reasm_overrun          += victim_exists;
     349           0 :       ctx->metrics.reasm_active           -= victim_exists;
     350           0 :     }
     351             : 
     352           0 :     slot = fd_tpu_reasm_prepare( reasm, conn_uid, stream_id, tspub ); /* infallible */
     353           0 :     ctx->metrics.reasm_started++;
     354           0 :     ctx->metrics.reasm_active++;
     355           0 :     conn->srx->rx_streams_active++;
     356           0 :   } else if( slot->k.state != FD_TPU_REASM_STATE_BUSY ) {
     357           0 :     ctx->metrics.frag_dup_cnt++;
     358           0 :     return FD_QUIC_SUCCESS;
     359           0 :   }
     360             : 
     361           0 :   int reasm_res = fd_tpu_reasm_frag( reasm, slot, data, data_sz, offset );
     362           0 :   if( FD_UNLIKELY( reasm_res != FD_TPU_REASM_SUCCESS ) ) {
     363           0 :     int is_gap    = reasm_res==FD_TPU_REASM_ERR_SKIP;
     364           0 :     int is_oversz = reasm_res==FD_TPU_REASM_ERR_SZ;
     365           0 :     ctx->metrics.frag_gap_cnt       += (ulong)is_gap;
     366           0 :     ctx->metrics.quic_txn_too_large += (ulong)is_oversz;
     367           0 :     return is_gap ? FD_QUIC_FAILED : FD_QUIC_SUCCESS;
     368           0 :   }
     369           0 :   ctx->metrics.frag_ok_cnt++;
     370             : 
     371           0 :   if( fin ) {
     372           0 :     if( FD_UNLIKELY( slot->k.sz < FD_TXN_MIN_SERIALIZED_SZ ) ) {
     373           0 :       ctx->metrics.quic_txn_too_small++;
     374           0 :       return FD_QUIC_SUCCESS; /* ignore */
     375           0 :     }
     376           0 :     int pub_err = fd_tpu_reasm_publish( reasm, slot, mcache, base, seq, tspub );
     377           0 :     if( FD_UNLIKELY( pub_err!=FD_TPU_REASM_SUCCESS ) ) return FD_QUIC_SUCCESS; /* unreachable */
     378           0 :     ulong * rcv_cnt = (offset==0UL && fin) ? &ctx->metrics.txns_received_quic_fast : &ctx->metrics.txns_received_quic_frag;
     379           0 :     (*rcv_cnt)++;
     380           0 :     ctx->metrics.reasm_active--;
     381           0 :     conn->srx->rx_streams_active--;
     382             : 
     383           0 :     fd_stem_advance( stem, 0UL );
     384           0 :   }
     385             : 
     386           0 :   return FD_QUIC_SUCCESS;
     387           0 : }
     388             : 
     389             : static int
     390             : quic_tx_aio_send( void *                    _ctx,
     391             :                   fd_aio_pkt_info_t const * batch,
     392             :                   ulong                     batch_cnt,
     393             :                   ulong *                   opt_batch_idx,
     394           0 :                   int                       flush ) {
     395           0 :   (void)flush;
     396             : 
     397           0 :   fd_quic_ctx_t * ctx = _ctx;
     398             : 
     399           0 :   for( ulong i=0; i<batch_cnt; i++ ) {
     400           0 :     if( FD_UNLIKELY( batch[ i ].buf_sz<FD_NETMUX_SIG_MIN_HDR_SZ ) ) continue;
     401             : 
     402           0 :     uint const ip_dst = FD_LOAD( uint, batch[ i ].buf+offsetof( fd_ip4_hdr_t, daddr_c ) );
     403           0 :     uchar * packet_l2 = fd_chunk_to_laddr( ctx->net_out_mem, ctx->net_out_chunk );
     404           0 :     uchar * packet_l3 = packet_l2 + sizeof(fd_eth_hdr_t);
     405           0 :     memset( packet_l2, 0, 12 );
     406           0 :     FD_STORE( ushort, packet_l2+offsetof( fd_eth_hdr_t, net_type ), fd_ushort_bswap( FD_ETH_HDR_TYPE_IP ) );
     407           0 :     fd_memcpy( packet_l3, batch[ i ].buf, batch[ i ].buf_sz );
     408           0 :     ulong sz_l2 = sizeof(fd_eth_hdr_t) + batch[ i ].buf_sz;
     409             : 
     410             :     /* send packets are just round-robined by sequence number, so for now
     411             :        just indicate where they came from so they don't bounce back */
     412           0 :     ulong sig = fd_disco_netmux_sig( ip_dst, 0U, ip_dst, DST_PROTO_OUTGOING, FD_NETMUX_SIG_MIN_HDR_SZ );
     413             : 
     414           0 :     long tspub = fd_tickcount();
     415           0 :     fd_mcache_publish( ctx->net_out_mcache,
     416           0 :                        ctx->net_out_depth,
     417           0 :                        ctx->net_out_seq,
     418           0 :                        sig,
     419           0 :                        ctx->net_out_chunk,
     420           0 :                        sz_l2,
     421           0 :                        fd_frag_meta_ctl( 0UL, 1, 1, 0 ),
     422           0 :                        0,
     423           0 :                        fd_frag_meta_ts_comp( tspub ) );
     424             : 
     425           0 :     ctx->net_out_seq   = fd_seq_inc( ctx->net_out_seq, 1UL );
     426           0 :     ctx->net_out_chunk = fd_dcache_compact_next( ctx->net_out_chunk, FD_NET_MTU, ctx->net_out_chunk0, ctx->net_out_wmark );
     427           0 :   }
     428             : 
     429           0 :   if( FD_LIKELY( opt_batch_idx ) ) {
     430           0 :     *opt_batch_idx = batch_cnt;
     431           0 :   }
     432             : 
     433           0 :   return FD_AIO_SUCCESS;
     434           0 : }
     435             : 
     436             : static void
     437             : privileged_init( fd_topo_t *      topo,
     438           0 :                  fd_topo_tile_t * tile ) {
     439           0 :   (void)topo; (void)tile;
     440             : 
     441             :   /* The fd_quic implementation calls fd_log_wallclock() internally
     442             :      which itself calls clock_gettime() which on most kernels is not a
     443             :      real syscall but a virtual one in the process via. the vDSO.
     444             : 
     445             :      The first time this virtual call is made to the vDSO it does an
     446             :      mmap(2) of some shared memory into userspace, which cannot
     447             :      happen while sandboxed so we need to ensure that initialization
     448             :      happens here. */
     449             : 
     450           0 :   fd_log_wallclock();
     451           0 : }
     452             : 
     453             : static void
     454             : quic_tls_cv_sign( void *      signer_ctx,
     455             :                   uchar       signature[ static 64 ],
     456           0 :                   uchar const payload[ static 130 ] ) {
     457           0 :   fd_quic_ctx_t * ctx = signer_ctx;
     458           0 :   fd_sha512_t * sha512 = fd_sha512_join( ctx->sha512 );
     459           0 :   fd_ed25519_sign( signature, payload, 130UL, ctx->tls_pub_key, ctx->tls_priv_key, sha512 );
     460           0 :   fd_sha512_leave( sha512 );
     461           0 : }
     462             : 
     463             : static void
     464             : unprivileged_init( fd_topo_t *      topo,
     465           0 :                    fd_topo_tile_t * tile ) {
     466           0 :   void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
     467           0 :   if( FD_UNLIKELY( topo->objs[ tile->tile_obj_id ].footprint < scratch_footprint( tile ) ) ) {
     468           0 :     FD_LOG_ERR(( "insufficient tile scratch space" ));
     469           0 :   }
     470             : 
     471           0 :   if( FD_UNLIKELY( tile->in_cnt==0 ) ) {
     472           0 :     FD_LOG_ERR(( "quic tile has no input links" ));
     473           0 :   }
     474           0 :   if( FD_UNLIKELY( tile->in_cnt > FD_QUIC_TILE_IN_MAX ) ) {
     475           0 :     FD_LOG_ERR(( "quic tile has too many input links (%lu), max %lu",
     476           0 :                  tile->in_cnt, FD_QUIC_TILE_IN_MAX ));
     477           0 :   }
     478             : 
     479           0 :   if( FD_UNLIKELY( tile->out_cnt!=2UL ||
     480           0 :                    strcmp( topo->links[ tile->out_link_id[ 0UL ] ].name, "quic_verify" ) ||
     481           0 :                    strcmp( topo->links[ tile->out_link_id[ 1UL ] ].name, "quic_net" ) ) )
     482           0 :     FD_LOG_ERR(( "quic tile has none or unexpected output links %lu %s %s",
     483           0 :                  tile->out_cnt, topo->links[ tile->out_link_id[ 0 ] ].name, topo->links[ tile->out_link_id[ 1 ] ].name ));
     484             : 
     485           0 :   ulong out_depth = topo->links[ tile->out_link_id[ 0 ] ].depth;
     486           0 :   if( FD_UNLIKELY( tile->quic.out_depth != out_depth ) )
     487           0 :     FD_LOG_ERR(( "tile->quic.out_depth (%u) does not match quic_verify link depth (%lu)",
     488           0 :                  tile->quic.out_depth, out_depth ));
     489             : 
     490           0 :   void * txn_dcache = topo->links[ tile->out_link_id[ 0UL ] ].dcache;
     491           0 :   if( FD_UNLIKELY( !txn_dcache ) ) FD_LOG_ERR(( "Missing output dcache" ));
     492             : 
     493           0 :   FD_SCRATCH_ALLOC_INIT( l, scratch );
     494           0 :   fd_quic_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_quic_ctx_t ), sizeof( fd_quic_ctx_t ) );
     495           0 :   fd_memset( ctx, 0, sizeof(fd_quic_ctx_t) );
     496             : 
     497           0 :   for( ulong i=0; i<tile->in_cnt; i++ ) {
     498           0 :     fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
     499           0 :     if( FD_UNLIKELY( 0!=strcmp( link->name, "net_quic" ) ) ) {
     500           0 :       FD_LOG_ERR(( "unexpected input link %s", link->name ));
     501           0 :     }
     502           0 :     fd_net_rx_bounds_init( &ctx->net_in_bounds[ i ], link->dcache );
     503           0 :   }
     504             : 
     505           0 :   if( FD_UNLIKELY( getrandom( ctx->tls_priv_key, ED25519_PRIV_KEY_SZ, 0 )!=ED25519_PRIV_KEY_SZ ) ) {
     506           0 :     FD_LOG_ERR(( "getrandom failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     507           0 :   }
     508           0 :   fd_sha512_t * sha512 = fd_sha512_join( fd_sha512_new( ctx->sha512 ) );
     509           0 :   fd_ed25519_public_from_private( ctx->tls_pub_key, ctx->tls_priv_key, sha512 );
     510           0 :   fd_sha512_leave( sha512 );
     511             : 
     512           0 :   fd_aio_t * quic_tx_aio = fd_aio_join( fd_aio_new( ctx->quic_tx_aio, ctx, quic_tx_aio_send ) );
     513           0 :   if( FD_UNLIKELY( !quic_tx_aio ) ) FD_LOG_ERR(( "fd_aio_join failed" ));
     514             : 
     515           0 :   fd_quic_limits_t limits = quic_limits( tile );
     516           0 :   fd_quic_t * quic = fd_quic_join( fd_quic_new( FD_SCRATCH_ALLOC_APPEND( l, fd_quic_align(), fd_quic_footprint( &limits ) ), &limits ) );
     517           0 :   if( FD_UNLIKELY( !quic ) ) FD_LOG_ERR(( "fd_quic_new failed" ));
     518             : 
     519           0 :   ulong  orig      = 0UL; /* fd_tango origin ID */
     520           0 :   ulong  reasm_max = tile->quic.reasm_cnt;
     521           0 :   void * reasm_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_tpu_reasm_align(), fd_tpu_reasm_footprint( out_depth, reasm_max ) );
     522           0 :   ctx->reasm       = fd_tpu_reasm_join( fd_tpu_reasm_new( reasm_mem, out_depth, reasm_max, orig, txn_dcache ) );
     523           0 :   if( FD_UNLIKELY( !ctx->reasm ) ) FD_LOG_ERR(( "fd_tpu_reasm_new failed" ));
     524             : 
     525           0 :   if( FD_UNLIKELY( tile->quic.ack_delay_millis == 0 ) ) {
     526           0 :     FD_LOG_ERR(( "Invalid `ack_delay_millis`: must be greater than zero" ));
     527           0 :   }
     528           0 :   if( FD_UNLIKELY( tile->quic.ack_delay_millis >= tile->quic.idle_timeout_millis ) ) {
     529           0 :     FD_LOG_ERR(( "Invalid `ack_delay_millis`: must be lower than `idle_timeout_millis`" ));
     530           0 :   }
     531             : 
     532           0 :   quic->config.role                       = FD_QUIC_ROLE_SERVER;
     533           0 :   quic->config.idle_timeout               = tile->quic.idle_timeout_millis * (ulong)1e6;
     534           0 :   quic->config.ack_delay                  = tile->quic.ack_delay_millis * (ulong)1e6;
     535           0 :   quic->config.initial_rx_max_stream_data = FD_TXN_MTU;
     536           0 :   quic->config.retry                      = tile->quic.retry;
     537           0 :   fd_memcpy( quic->config.identity_public_key, ctx->tls_pub_key, ED25519_PUB_KEY_SZ );
     538             : 
     539           0 :   quic->config.sign         = quic_tls_cv_sign;
     540           0 :   quic->config.sign_ctx     = ctx;
     541             : 
     542           0 :   quic->cb.conn_final       = quic_conn_final;
     543           0 :   quic->cb.stream_rx        = quic_stream_rx;
     544           0 :   quic->cb.now              = quic_now;
     545           0 :   quic->cb.now_ctx          = ctx;
     546           0 :   quic->cb.quic_ctx         = ctx;
     547             : 
     548           0 :   fd_quic_set_aio_net_tx( quic, quic_tx_aio );
     549           0 :   fd_quic_set_clock_tickcount( quic );
     550           0 :   if( FD_UNLIKELY( !fd_quic_init( quic ) ) ) FD_LOG_ERR(( "fd_quic_init failed" ));
     551             : 
     552           0 :   fd_topo_link_t * net_out = &topo->links[ tile->out_link_id[ 1 ] ];
     553             : 
     554           0 :   ctx->net_out_mcache = net_out->mcache;
     555           0 :   ctx->net_out_sync   = fd_mcache_seq_laddr( ctx->net_out_mcache );
     556           0 :   ctx->net_out_depth  = fd_mcache_depth( ctx->net_out_mcache );
     557           0 :   ctx->net_out_seq    = fd_mcache_seq_query( ctx->net_out_sync );
     558           0 :   ctx->net_out_mem    = topo->workspaces[ topo->objs[ net_out->dcache_obj_id ].wksp_id ].wksp;
     559           0 :   ctx->net_out_chunk0 = fd_dcache_compact_chunk0( ctx->net_out_mem, net_out->dcache );
     560           0 :   ctx->net_out_wmark  = fd_dcache_compact_wmark ( ctx->net_out_mem, net_out->dcache, net_out->mtu );
     561           0 :   ctx->net_out_chunk  = ctx->net_out_chunk0;
     562             : 
     563           0 :   fd_topo_link_t * verify_out = &topo->links[ tile->out_link_id[ 0 ] ];
     564             : 
     565           0 :   ctx->verify_out_mem = topo->workspaces[ topo->objs[ verify_out->dcache_obj_id ].wksp_id ].wksp;
     566             : 
     567           0 :   ctx->quic = quic;
     568             : 
     569           0 :   ctx->round_robin_cnt = fd_topo_tile_name_cnt( topo, tile->name );
     570           0 :   ctx->round_robin_id  = tile->kind_id;
     571           0 :   if( FD_UNLIKELY( ctx->round_robin_id >= ctx->round_robin_cnt ) ) {
     572           0 :     FD_LOG_ERR(( "invalid round robin configuration" ));
     573           0 :   }
     574             : 
     575           0 :   ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, 1UL );
     576           0 :   if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
     577           0 :     FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
     578             : 
     579           0 :   fd_histf_join( fd_histf_new( ctx->quic->metrics.service_duration, FD_MHIST_SECONDS_MIN( QUIC, SERVICE_DURATION_SECONDS ),
     580           0 :                                                                     FD_MHIST_SECONDS_MAX( QUIC, SERVICE_DURATION_SECONDS ) ) );
     581           0 :   fd_histf_join( fd_histf_new( ctx->quic->metrics.receive_duration, FD_MHIST_SECONDS_MIN( QUIC, RECEIVE_DURATION_SECONDS ),
     582           0 :                                                                     FD_MHIST_SECONDS_MAX( QUIC, RECEIVE_DURATION_SECONDS ) ) );
     583           0 : }
     584             : 
     585             : static ulong
     586             : populate_allowed_seccomp( fd_topo_t const *      topo,
     587             :                           fd_topo_tile_t const * tile,
     588             :                           ulong                  out_cnt,
     589           0 :                           struct sock_filter *   out ) {
     590           0 :   (void)topo;
     591           0 :   (void)tile;
     592             : 
     593           0 :   populate_sock_filter_policy_quic( out_cnt, out, (uint)fd_log_private_logfile_fd() );
     594           0 :   return sock_filter_policy_quic_instr_cnt;
     595           0 : }
     596             : 
     597             : static ulong
     598             : populate_allowed_fds( fd_topo_t const *      topo,
     599             :                       fd_topo_tile_t const * tile,
     600             :                       ulong                  out_fds_cnt,
     601           0 :                       int *                  out_fds ) {
     602           0 :   (void)topo;
     603           0 :   (void)tile;
     604             : 
     605           0 :   if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
     606             : 
     607           0 :   ulong out_cnt = 0UL;
     608           0 :   out_fds[ out_cnt++ ] = 2; /* stderr */
     609           0 :   if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
     610           0 :     out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
     611           0 :   return out_cnt;
     612           0 : }
     613             : 
     614           0 : #define STEM_BURST (1UL)
     615             : 
     616           0 : #define STEM_CALLBACK_CONTEXT_TYPE  fd_quic_ctx_t
     617           0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_quic_ctx_t)
     618             : 
     619           0 : #define STEM_CALLBACK_DURING_HOUSEKEEPING during_housekeeping
     620           0 : #define STEM_CALLBACK_METRICS_WRITE       metrics_write
     621           0 : #define STEM_CALLBACK_BEFORE_CREDIT       before_credit
     622           0 : #define STEM_CALLBACK_BEFORE_FRAG         before_frag
     623           0 : #define STEM_CALLBACK_DURING_FRAG         during_frag
     624           0 : #define STEM_CALLBACK_AFTER_FRAG          after_frag
     625             : 
     626             : #include "../stem/fd_stem.c"
     627             : 
     628             : fd_topo_run_tile_t fd_tile_quic = {
     629             :   .name                     = "quic",
     630             :   .populate_allowed_seccomp = populate_allowed_seccomp,
     631             :   .populate_allowed_fds     = populate_allowed_fds,
     632             :   .scratch_align            = scratch_align,
     633             :   .scratch_footprint        = scratch_footprint,
     634             :   .privileged_init          = privileged_init,
     635             :   .unprivileged_init        = unprivileged_init,
     636             :   .run                      = stem_run,
     637             : };

Generated by: LCOV version 1.14