LCOV - code coverage report
Current view: top level - disco/net/xdp - fd_xdp_tile.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 9 687 1.3 %
Date: 2025-03-20 12:08:36 Functions: 2 27 7.4 %

          Line data    Source code
       1             : /* The xdp tile translates between AF_XDP and fd_tango
       2             :    traffic.  It is responsible for setting up the XDP and
       3             :    XSK socket configuration. */
       4             : 
       5             : #include <errno.h>
       6             : #include <fcntl.h>
       7             : #include <net/if.h>
       8             : #include <sys/socket.h> /* MSG_DONTWAIT needed before importing the net seccomp filter */
       9             : #include <linux/if_xdp.h>
      10             : 
      11             : #include "../../metrics/fd_metrics.h"
      12             : #include "../../netlink/fd_netlink_tile.h" /* neigh4_solicit */
      13             : #include "../../topo/fd_topo.h"
      14             : 
      15             : #include "../../../waltz/ip/fd_fib4.h"
      16             : #include "../../../waltz/neigh/fd_neigh4_map.h"
      17             : #include "../../../waltz/xdp/fd_xdp_redirect_user.h" /* fd_xsk_activate */
      18             : #include "../../../waltz/xdp/fd_xsk.h"
      19             : #include "../../../util/log/fd_dtrace.h"
      20             : #include "../../../util/net/fd_eth.h"
      21             : #include "../../../util/net/fd_ip4.h"
      22             : 
      23             : #include <unistd.h>
      24             : #include <linux/if.h> /* struct ifreq */
      25             : #include <sys/ioctl.h>
      26             : #include <linux/unistd.h>
      27             : 
      28             : #include "generated/xdp_seccomp.h"
      29             : 
      30             : /* MAX_NET_INS controls the max number of TX links that a net tile can
      31             :    serve. */
      32             : 
      33             : #define MAX_NET_INS (32UL)
      34             : 
      35             : /* FD_XDP_STATS_INTERVAL_NS controls the XDP stats refresh interval.
      36             :    This should be lower than the interval at which the metrics tile
      37             :    collects metrics. */
      38             : 
      39           0 : #define FD_XDP_STATS_INTERVAL_NS (11e6) /* 11ms */
      40             : 
      41             : /* fd_net_in_ctx_t contains consumer information for an incoming tango
      42             :    link.  It is used as part of the TX path. */
      43             : 
      44             : typedef struct {
      45             :   fd_wksp_t * mem;
      46             :   ulong       chunk0;
      47             :   ulong       wmark;
      48             : } fd_net_in_ctx_t;
      49             : 
      50             : /* fd_net_out_ctx_t contains publisher information for a link to a
      51             :    downstream app tile.  It is used as part of the RX path. */
      52             : 
      53             : typedef struct {
      54             :   fd_frag_meta_t * mcache;
      55             :   ulong *          sync;
      56             :   ulong            depth;
      57             :   ulong            seq;
      58             : } fd_net_out_ctx_t;
      59             : 
      60             : /* fd_net_flusher_t controls the pacing of XDP sendto calls for flushing
      61             :    TX batches.  In the 'wakeup' XDP mode, no TX occurs unless the net
      62             :    tile wakes up the kernel periodically using the sendto() syscall.
      63             :    If sendto() is called too frequently, time is wasted on context
      64             :    switches.  If sendto() is called not often enough, packets are
      65             :    delayed or dropped.  sendto() calls make almost no guarantees how
      66             :    much packets are sent out, nor do they indicate when the kernel
      67             :    finishes a wakeup call (asynchronously dispatched).  The net tile
      68             :    thus uses a myraid of flush triggers that were tested for best
      69             :    performance. */
      70             : 
      71             : struct fd_net_flusher {
      72             : 
      73             :   /* Packets that were enqueued after the last sendto() wakeup are
      74             :      considered "pending".  If there are more than pending_wmark packets
      75             :      pending, a wakeup is dispatched.  Thus, this dispatch trigger is
      76             :      proportional to packet rate, but does not trigger if I/O is seldom. */
      77             :   ulong pending_cnt;
      78             :   ulong pending_wmark;
      79             : 
      80             :   /* Sometimes, packets are not flushed out even after a sendto()
      81             :      wakeup.  This can result in the tail of a burst getting delayed or
      82             :      overrun.  If more than tail_flush_backoff ticks pass since the last
      83             :      sendto() wakeup and there are still unacknowledged packets in the
      84             :      TX ring, issues another wakeup. */
      85             :   long next_tail_flush_ticks;
      86             :   long tail_flush_backoff;
      87             : 
      88             : };
      89             : 
      90             : typedef struct fd_net_flusher fd_net_flusher_t;
      91             : 
      92             : FD_PROTOTYPES_BEGIN
      93             : 
      94             : /* fd_net_flusher_inc marks a new packet as enqueued. */
      95             : 
      96             : static inline void
      97             : fd_net_flusher_inc( fd_net_flusher_t * flusher,
      98           0 :                     long               now ) {
      99           0 :   flusher->pending_cnt++;
     100           0 :   long next_flush = now + flusher->tail_flush_backoff;
     101           0 :   flusher->next_tail_flush_ticks = fd_long_min( flusher->next_tail_flush_ticks, next_flush );
     102           0 : }
     103             : 
     104             : /* fd_net_flusher_check returns 1 if a sendto() wakeup should be issued
     105             :    immediately.  now is a recent fd_tickcount() value.
     106             :    If tx_ring_empty==0 then the kernel is caught up with the net tile
     107             :    on the XDP TX ring.  (Otherwise, the kernel is behind the net tile) */
     108             : 
     109             : static inline int
     110             : fd_net_flusher_check( fd_net_flusher_t * flusher,
     111             :                       long               now,
     112           0 :                       int                tx_ring_empty ) {
     113           0 :   int flush_level   = flusher->pending_cnt >= flusher->pending_wmark;
     114           0 :   int flush_timeout = now >= flusher->next_tail_flush_ticks;
     115           0 :   int flush         = flush_level || flush_timeout;
     116           0 :   if( !flush ) return 0;
     117           0 :   if( FD_UNLIKELY( tx_ring_empty ) ) {
     118             :     /* Flush requested but caught up */
     119           0 :     flusher->pending_cnt           = 0UL;
     120           0 :     flusher->next_tail_flush_ticks = LONG_MAX;
     121           0 :     return 0;
     122           0 :   }
     123           0 :   return 1;
     124           0 : }
     125             : 
     126             : /* fd_net_flusher_wakeup signals a sendto() wakeup was done.  now is a
     127             :    recent fd_tickcount() value. */
     128             : 
     129             : static inline void
     130             : fd_net_flusher_wakeup( fd_net_flusher_t * flusher,
     131           0 :                        long               now ) {
     132           0 :   flusher->pending_cnt           = 0UL;
     133           0 :   flusher->next_tail_flush_ticks = now + flusher->tail_flush_backoff;
     134           0 : }
     135             : 
     136             : FD_PROTOTYPES_END
     137             : 
     138             : /* fd_net_free_ring is a FIFO queue that stores pointers to free XDP TX
     139             :    frames. */
     140             : 
     141             : struct fd_net_free_ring {
     142             :   ulong   prod;
     143             :   ulong   cons;
     144             :   ulong   depth;
     145             :   ulong * queue;
     146             : };
     147             : typedef struct fd_net_free_ring fd_net_free_ring_t;
     148             : 
     149             : typedef struct {
     150             :   /* An "XSK" is an AF_XDP socket */
     151             :   uint     xsk_cnt;
     152             :   fd_xsk_t xsk[ 2 ];
     153             :   int      prog_link_fds[ 2 ];
     154             : 
     155             :   /* UMEM frame region within dcache */
     156             :   void *   umem_frame0; /* First UMEM frame */
     157             :   ulong    umem_sz;     /* Usable UMEM size starting at frame0 */
     158             : 
     159             :   /* UMEM chunk region within workspace */
     160             :   uint     umem_chunk0; /* Lowest allowed chunk number */
     161             :   uint     umem_wmark;  /* Highest allowed chunk number */
     162             : 
     163             :   /* All net tiles are subscribed to the same TX links.  (These are
     164             :      incoming links from app tiles asking the net tile to send out packets)
     165             :      The net tiles "take turns" doing TX jobs based on the L3+L4 dst hash.
     166             :      net_tile_id is the index of the current interface, net_tile_cnt is the
     167             :      total amount of interfaces. */
     168             :   uint net_tile_id;
     169             :   uint net_tile_cnt;
     170             : 
     171             :   /* Details pertaining to an inflight send op */
     172             :   struct {
     173             :     uint   if_idx; /* 0: main interface, 1: loopback */
     174             :     void * frame;
     175             :     uchar  mac_addrs[12]; /* First 12 bytes of Ethernet header */
     176             :     uint   src_ip;
     177             :   } tx_op;
     178             : 
     179             :   /* Round-robin cycle serivce operations */
     180             :   uint rr_idx;
     181             : 
     182             :   /* Ring tracking free packet buffers */
     183             :   fd_net_free_ring_t free_tx;
     184             : 
     185             :   uchar  src_mac_addr[6];
     186             : 
     187             :   ushort shred_listen_port;
     188             :   ushort quic_transaction_listen_port;
     189             :   ushort legacy_transaction_listen_port;
     190             :   ushort gossip_listen_port;
     191             :   ushort repair_intake_listen_port;
     192             :   ushort repair_serve_listen_port;
     193             : 
     194             :   ulong in_cnt;
     195             :   fd_net_in_ctx_t in[ MAX_NET_INS ];
     196             : 
     197             :   fd_net_out_ctx_t quic_out[1];
     198             :   fd_net_out_ctx_t shred_out[1];
     199             :   fd_net_out_ctx_t gossip_out[1];
     200             :   fd_net_out_ctx_t repair_out[1];
     201             : 
     202             :   /* XDP stats refresh timer */
     203             :   long xdp_stats_interval_ticks;
     204             :   long next_xdp_stats_refresh;
     205             : 
     206             :   /* TX flush timers */
     207             :   fd_net_flusher_t tx_flusher[2]; /* one per XSK */
     208             : 
     209             :   /* Route and neighbor tables */
     210             :   fd_fib4_t const * fib_local;
     211             :   fd_fib4_t const * fib_main;
     212             :   fd_neigh4_hmap_t  neigh4[1];
     213             :   fd_netlink_neigh4_solicit_link_t neigh4_solicit[1];
     214             : 
     215             :   struct {
     216             :     ulong rx_pkt_cnt;
     217             :     ulong rx_bytes_total;
     218             :     ulong rx_undersz_cnt;
     219             :     ulong rx_fill_blocked_cnt;
     220             :     ulong rx_backp_cnt;
     221             :     long  rx_busy_cnt;
     222             :     long  rx_idle_cnt;
     223             : 
     224             :     ulong tx_submit_cnt;
     225             :     ulong tx_complete_cnt;
     226             :     ulong tx_bytes_total;
     227             :     ulong tx_route_fail_cnt;
     228             :     ulong tx_no_xdp_cnt;
     229             :     ulong tx_neigh_fail_cnt;
     230             :     ulong tx_full_fail_cnt;
     231             :     long  tx_busy_cnt;
     232             :     long  tx_idle_cnt;
     233             : 
     234             :     ulong xsk_tx_wakeup_cnt;
     235             :     ulong xsk_rx_wakeup_cnt;
     236             :   } metrics;
     237             : } fd_net_ctx_t;
     238             : 
     239             : FD_FN_CONST static inline ulong
     240           3 : scratch_align( void ) {
     241           3 :   return 4096UL;
     242           3 : }
     243             : 
     244             : FD_FN_PURE static inline ulong
     245           3 : scratch_footprint( fd_topo_tile_t const * tile ) {
     246           3 :   ulong l = FD_LAYOUT_INIT;
     247           3 :   l = FD_LAYOUT_APPEND( l, alignof(fd_net_ctx_t), sizeof(fd_net_ctx_t)                      );
     248           3 :   l = FD_LAYOUT_APPEND( l, alignof(ulong),        tile->net.free_ring_depth * sizeof(ulong) );
     249           3 :   return FD_LAYOUT_FINI( l, scratch_align() );
     250           3 : }
     251             : 
     252             : static void
     253           0 : metrics_write( fd_net_ctx_t * ctx ) {
     254           0 :   FD_MCNT_SET(   NET, RX_PKT_CNT,          ctx->metrics.rx_pkt_cnt          );
     255           0 :   FD_MCNT_SET(   NET, RX_BYTES_TOTAL,      ctx->metrics.rx_bytes_total      );
     256           0 :   FD_MCNT_SET(   NET, RX_UNDERSZ_CNT,      ctx->metrics.rx_undersz_cnt      );
     257           0 :   FD_MCNT_SET(   NET, RX_FILL_BLOCKED_CNT, ctx->metrics.rx_fill_blocked_cnt );
     258           0 :   FD_MCNT_SET(   NET, RX_BACKPRESSURE_CNT, ctx->metrics.rx_backp_cnt        );
     259           0 :   FD_MGAUGE_SET( NET, RX_BUSY_CNT, (ulong)fd_long_max( ctx->metrics.rx_busy_cnt, 0L ) );
     260           0 :   FD_MGAUGE_SET( NET, RX_IDLE_CNT, (ulong)fd_long_max( ctx->metrics.rx_idle_cnt, 0L ) );
     261           0 :   FD_MGAUGE_SET( NET, TX_BUSY_CNT, (ulong)fd_long_max( ctx->metrics.tx_busy_cnt, 0L ) );
     262           0 :   FD_MGAUGE_SET( NET, TX_IDLE_CNT, (ulong)fd_long_max( ctx->metrics.tx_idle_cnt, 0L ) );
     263             : 
     264           0 :   FD_MCNT_SET( NET, TX_SUBMIT_CNT,        ctx->metrics.tx_submit_cnt     );
     265           0 :   FD_MCNT_SET( NET, TX_COMPLETE_CNT,      ctx->metrics.tx_complete_cnt   );
     266           0 :   FD_MCNT_SET( NET, TX_BYTES_TOTAL,       ctx->metrics.tx_bytes_total    );
     267           0 :   FD_MCNT_SET( NET, TX_ROUTE_FAIL_CNT,    ctx->metrics.tx_route_fail_cnt );
     268           0 :   FD_MCNT_SET( NET, TX_NEIGHBOR_FAIL_CNT, ctx->metrics.tx_neigh_fail_cnt );
     269           0 :   FD_MCNT_SET( NET, TX_FULL_FAIL_CNT,     ctx->metrics.tx_full_fail_cnt  );
     270             : 
     271           0 :   FD_MCNT_SET( NET, XSK_TX_WAKEUP_CNT,    ctx->metrics.xsk_tx_wakeup_cnt    );
     272           0 :   FD_MCNT_SET( NET, XSK_RX_WAKEUP_CNT,    ctx->metrics.xsk_rx_wakeup_cnt    );
     273           0 : }
     274             : 
     275             : struct xdp_statistics_v0 {
     276             :   __u64 rx_dropped; /* Dropped for other reasons */
     277             :   __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
     278             :   __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
     279             : };
     280             : 
     281             : struct xdp_statistics_v1 {
     282             :   __u64 rx_dropped; /* Dropped for other reasons */
     283             :   __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
     284             :   __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
     285             :   __u64 rx_ring_full; /* Dropped due to rx ring being full */
     286             :   __u64 rx_fill_ring_empty_descs; /* Failed to retrieve item from fill ring */
     287             :   __u64 tx_ring_empty_descs; /* Failed to retrieve item from tx ring */
     288             : };
     289             : 
     290             : static void
     291           0 : poll_xdp_statistics( fd_net_ctx_t * ctx ) {
     292           0 :   struct xdp_statistics_v1 stats = {0};
     293           0 :   ulong xsk_cnt = ctx->xsk_cnt;
     294           0 :   for( ulong j=0UL; j<xsk_cnt; j++ ) {
     295           0 :     struct xdp_statistics_v1 sub_stats;
     296           0 :     uint optlen = (uint)sizeof(struct xdp_statistics_v1);
     297           0 :     if( FD_UNLIKELY( -1==getsockopt( ctx->xsk[ j ].xsk_fd, SOL_XDP, XDP_STATISTICS, &sub_stats, &optlen ) ) )
     298           0 :       FD_LOG_ERR(( "getsockopt(SOL_XDP, XDP_STATISTICS) failed: %s", strerror( errno ) ));
     299           0 :     if( FD_UNLIKELY( optlen!=sizeof(struct xdp_statistics_v0) &&
     300           0 :                      optlen!=sizeof(struct xdp_statistics_v1) ) ) {
     301           0 :       FD_LOG_ERR(( "getsockopt(SOL_XDP, XDP_STATISTICS) returned unexpected size %u", optlen ));
     302           0 :     }
     303           0 :     stats.rx_dropped               += sub_stats.rx_dropped;
     304           0 :     stats.rx_invalid_descs         += sub_stats.rx_invalid_descs;
     305           0 :     stats.tx_invalid_descs         += sub_stats.tx_invalid_descs;
     306           0 :     stats.rx_ring_full             += sub_stats.rx_ring_full;
     307           0 :     stats.rx_fill_ring_empty_descs += sub_stats.rx_fill_ring_empty_descs;
     308           0 :     stats.tx_ring_empty_descs      += sub_stats.tx_ring_empty_descs;
     309           0 :   }
     310             : 
     311           0 :   FD_MCNT_SET( NET, XDP_RX_DROPPED_OTHER,         stats.rx_dropped               );
     312           0 :   FD_MCNT_SET( NET, XDP_RX_INVALID_DESCS,         stats.rx_invalid_descs         );
     313           0 :   FD_MCNT_SET( NET, XDP_TX_INVALID_DESCS,         stats.tx_invalid_descs         );
     314           0 :   FD_MCNT_SET( NET, XDP_RX_RING_FULL,             stats.rx_ring_full             );
     315           0 :   FD_MCNT_SET( NET, XDP_RX_FILL_RING_EMPTY_DESCS, stats.rx_fill_ring_empty_descs );
     316           0 :   FD_MCNT_SET( NET, XDP_TX_RING_EMPTY_DESCS,      stats.tx_ring_empty_descs      );
     317           0 : }
     318             : 
     319             : /* net_is_fatal_xdp_error returns 1 if the given errno returned by an
     320             :    XDP API indicates a non-recoverable error code.  The net tile should
     321             :    crash if it sees such an error so the problem does not go undetected.
     322             :    Otherwise, returns 0. */
     323             : 
     324             : static int
     325           0 : net_is_fatal_xdp_error( int err ) {
     326           0 :   return err==ESOCKTNOSUPPORT || err==EOPNOTSUPP || err==EINVAL ||
     327           0 :          err==EPERM;
     328           0 : }
     329             : 
     330             : /* net_tx_ready returns 1 if the current XSK is ready to submit a TX send
     331             :    job.  If the XSK is blocked for sends, returns 0.  Reasons for block
     332             :    include:
     333             :    - No XSK TX buffer is available
     334             :    - XSK TX ring is full */
     335             : 
     336             : static int
     337             : net_tx_ready( fd_net_ctx_t * ctx,
     338           0 :               uint           if_idx ) {
     339           0 :   fd_xsk_t *           xsk     = &ctx->xsk[ if_idx ];
     340           0 :   fd_xdp_ring_t *      tx_ring = &xsk->ring_tx;
     341           0 :   fd_net_free_ring_t * free    = &ctx->free_tx;
     342           0 :   if( free->prod == free->cons ) return 0; /* drop */
     343           0 :   if( tx_ring->prod - tx_ring->cons >= tx_ring->depth ) return 0; /* drop */
     344           0 :   return 1;
     345           0 : }
     346             : 
     347             : /* net_rx_wakeup triggers xsk_recvmsg to run in the kernel.  Needs to be
     348             :    called periodically in order to receive packets. */
     349             : 
     350             : static void
     351             : net_rx_wakeup( fd_net_ctx_t * ctx,
     352             :                fd_xsk_t *     xsk,
     353           0 :                int *          charge_busy ) {
     354           0 :   if( !fd_xsk_rx_need_wakeup( xsk ) ) return;
     355           0 :   *charge_busy = 1;
     356           0 :   struct msghdr _ignored[ 1 ] = { 0 };
     357           0 :   if( FD_UNLIKELY( -1==recvmsg( xsk->xsk_fd, _ignored, MSG_DONTWAIT ) ) ) {
     358           0 :     if( FD_UNLIKELY( net_is_fatal_xdp_error( errno ) ) ) {
     359           0 :       FD_LOG_ERR(( "xsk recvmsg failed xsk_fd=%d (%i-%s)", xsk->xsk_fd, errno, fd_io_strerror( errno ) ));
     360           0 :     }
     361           0 :     if( FD_UNLIKELY( errno!=EAGAIN ) ) {
     362           0 :       long ts = fd_log_wallclock();
     363           0 :       if( ts > xsk->log_suppress_until_ns ) {
     364           0 :         FD_LOG_WARNING(( "xsk recvmsg failed xsk_fd=%d (%i-%s)", xsk->xsk_fd, errno, fd_io_strerror( errno ) ));
     365           0 :         xsk->log_suppress_until_ns = ts + (long)1e9;
     366           0 :       }
     367           0 :     }
     368           0 :   }
     369           0 :   ctx->metrics.xsk_rx_wakeup_cnt++;
     370           0 : }
     371             : 
     372             : /* net_tx_wakeup triggers xsk_sendmsg to run in the kernel.  Needs to be
     373             :    called periodically in order to transmit packets. */
     374             : 
     375             : static void
     376             : net_tx_wakeup( fd_net_ctx_t * ctx,
     377             :                fd_xsk_t *     xsk,
     378           0 :                int *          charge_busy ) {
     379           0 :   if( !fd_xsk_tx_need_wakeup( xsk ) ) return;
     380           0 :   if( FD_VOLATILE_CONST( *xsk->ring_tx.prod )==FD_VOLATILE_CONST( *xsk->ring_tx.cons ) ) return;
     381           0 :   *charge_busy = 1;
     382           0 :   if( FD_UNLIKELY( -1==sendto( xsk->xsk_fd, NULL, 0, MSG_DONTWAIT, NULL, 0 ) ) ) {
     383           0 :     if( FD_UNLIKELY( net_is_fatal_xdp_error( errno ) ) ) {
     384           0 :       FD_LOG_ERR(( "xsk sendto failed xsk_fd=%d (%i-%s)", xsk->xsk_fd, errno, fd_io_strerror( errno ) ));
     385           0 :     }
     386           0 :     if( FD_UNLIKELY( errno!=EAGAIN ) ) {
     387           0 :       long ts = fd_log_wallclock();
     388           0 :       if( ts > xsk->log_suppress_until_ns ) {
     389           0 :         FD_LOG_WARNING(( "xsk sendto failed xsk_fd=%d (%i-%s)", xsk->xsk_fd, errno, fd_io_strerror( errno ) ));
     390           0 :         xsk->log_suppress_until_ns = ts + (long)1e9;
     391           0 :       }
     392           0 :     }
     393           0 :   }
     394           0 :   ctx->metrics.xsk_tx_wakeup_cnt++;
     395           0 : }
     396             : 
     397             : /* net_tx_periodic_wakeup does a timer based xsk_sendmsg wakeup. */
     398             : 
     399             : static inline int
     400             : net_tx_periodic_wakeup( fd_net_ctx_t * ctx,
     401             :                         uint           if_idx,
     402             :                         long           now,
     403           0 :                         int *          charge_busy ) {
     404           0 :   uint tx_prod = FD_VOLATILE_CONST( *ctx->xsk[ if_idx ].ring_tx.prod );
     405           0 :   uint tx_cons = FD_VOLATILE_CONST( *ctx->xsk[ if_idx ].ring_tx.cons );
     406           0 :   int tx_ring_empty = tx_prod==tx_cons;
     407           0 :   if( fd_net_flusher_check( ctx->tx_flusher+if_idx, now, tx_ring_empty ) ) {
     408           0 :     net_tx_wakeup( ctx, &ctx->xsk[ if_idx ], charge_busy );
     409           0 :     fd_net_flusher_wakeup( ctx->tx_flusher+if_idx, now );
     410           0 :   }
     411           0 :   return 0;
     412           0 : }
     413             : 
     414             : static void
     415           0 : during_housekeeping( fd_net_ctx_t * ctx ) {
     416           0 :   long now = fd_tickcount();
     417             : 
     418           0 :   ctx->metrics.rx_busy_cnt = 0UL;
     419           0 :   ctx->metrics.rx_idle_cnt = 0UL;
     420           0 :   ctx->metrics.tx_busy_cnt = 0UL;
     421           0 :   ctx->metrics.tx_idle_cnt = fd_seq_diff( ctx->free_tx.prod, ctx->free_tx.cons );
     422           0 :   for( uint j=0U; j<ctx->xsk_cnt; j++ ) {
     423           0 :     fd_xsk_t * xsk = &ctx->xsk[ j ];
     424             :     /* Refresh all sequence numbers (consumer first, then producer) */
     425           0 :     FD_COMPILER_MFENCE();
     426           0 :     xsk->ring_fr.cached_cons = FD_VOLATILE_CONST( *xsk->ring_fr.cons );
     427           0 :     xsk->ring_fr.cached_prod = FD_VOLATILE_CONST( *xsk->ring_fr.prod );
     428           0 :     xsk->ring_rx.cached_cons = FD_VOLATILE_CONST( *xsk->ring_rx.cons );
     429           0 :     xsk->ring_rx.cached_prod = FD_VOLATILE_CONST( *xsk->ring_rx.prod );
     430           0 :     xsk->ring_tx.cached_cons = FD_VOLATILE_CONST( *xsk->ring_tx.cons );
     431           0 :     xsk->ring_tx.cached_prod = FD_VOLATILE_CONST( *xsk->ring_tx.prod );
     432           0 :     xsk->ring_cr.cached_cons = FD_VOLATILE_CONST( *xsk->ring_cr.cons );
     433           0 :     xsk->ring_cr.cached_prod = FD_VOLATILE_CONST( *xsk->ring_cr.prod );
     434           0 :     FD_COMPILER_MFENCE();
     435           0 :     ctx->metrics.rx_busy_cnt += (long)(int)( xsk->ring_rx.cached_prod - xsk->ring_rx.cached_cons );
     436           0 :     ctx->metrics.rx_idle_cnt += (long)(int)( xsk->ring_fr.cached_prod - xsk->ring_fr.cached_cons );
     437           0 :     ctx->metrics.tx_busy_cnt += (long)(int)( xsk->ring_tx.cached_prod - xsk->ring_tx.cached_cons );
     438           0 :     ctx->metrics.tx_busy_cnt += (long)(int)( xsk->ring_cr.cached_prod - xsk->ring_cr.cached_cons );
     439           0 :   }
     440             : 
     441           0 :   if( now > ctx->next_xdp_stats_refresh ) {
     442           0 :     ctx->next_xdp_stats_refresh = now + ctx->xdp_stats_interval_ticks;
     443           0 :     poll_xdp_statistics( ctx );
     444           0 :   }
     445             : 
     446           0 :   int _charge_busy = 0;
     447           0 :   for( uint j=0U; j<ctx->xsk_cnt; j++ ) {
     448           0 :     net_rx_wakeup( ctx, &ctx->xsk[ j ], &_charge_busy );
     449           0 :   }
     450           0 : }
     451             : 
     452             : /* net_tx_route resolves the destination interface index, src MAC address,
     453             :    and dst MAC address.  Returns 1 on success, 0 on failure.  On success,
     454             :    tx_op->{if_idx,mac_addrs} is set. */
     455             : 
     456             : static int
     457             : net_tx_route( fd_net_ctx_t * ctx,
     458           0 :               uint           dst_ip ) {
     459             : 
     460             :   /* Route lookup */
     461             : 
     462           0 :   fd_fib4_hop_t hop[2] = {0};
     463           0 :   fd_fib4_lookup( ctx->fib_local, hop+0, dst_ip, 0UL );
     464           0 :   fd_fib4_lookup( ctx->fib_main,  hop+1, dst_ip, 0UL );
     465           0 :   fd_fib4_hop_t const * next_hop = fd_fib4_hop_or( hop+0, hop+1 );
     466             : 
     467           0 :   uint rtype   = next_hop->rtype;
     468           0 :   uint if_idx  = next_hop->if_idx;
     469           0 :   uint ip4_src = next_hop->ip4_src;
     470             : 
     471           0 :   if( FD_UNLIKELY( rtype==FD_FIB4_RTYPE_LOCAL ) ) {
     472           0 :     rtype  = FD_FIB4_RTYPE_UNICAST;
     473           0 :     if_idx = 1;
     474           0 :   }
     475             : 
     476           0 :   if( FD_UNLIKELY( rtype!=FD_FIB4_RTYPE_UNICAST ) ) {
     477           0 :     ctx->metrics.tx_route_fail_cnt++;
     478           0 :     return 0;
     479           0 :   }
     480             : 
     481           0 :   if( if_idx==1 ) {
     482             :     /* Set Ethernet src and dst address to 00:00:00:00:00:00 */
     483           0 :     memset( ctx->tx_op.mac_addrs, 0, 12UL );
     484           0 :     ctx->tx_op.if_idx = 1;
     485           0 :     return 1;
     486           0 :   }
     487             : 
     488           0 :   if( FD_UNLIKELY( if_idx!=ctx->xsk[ 0 ].if_idx ) ) {
     489           0 :     ctx->metrics.tx_no_xdp_cnt++;
     490           0 :     return 0;
     491           0 :   }
     492           0 :   ctx->tx_op.if_idx = 0;
     493             : 
     494             :   /* Neighbor resolve */
     495             : 
     496           0 :   uint neigh_ip = next_hop->ip4_gw;
     497           0 :   if( !neigh_ip ) neigh_ip = dst_ip;
     498             : 
     499           0 :   fd_neigh4_hmap_query_t neigh_query[1];
     500           0 :   int neigh_res = fd_neigh4_hmap_query_try( ctx->neigh4, &neigh_ip, NULL, neigh_query, 0 );
     501           0 :   if( FD_UNLIKELY( neigh_res!=FD_MAP_SUCCESS ) ) {
     502             :     /* Neighbor not found */
     503           0 :     fd_netlink_neigh4_solicit( ctx->neigh4_solicit, neigh_ip, if_idx, fd_frag_meta_ts_comp( fd_tickcount() ) );
     504           0 :     ctx->metrics.tx_neigh_fail_cnt++;
     505           0 :     return 0;
     506           0 :   }
     507           0 :   fd_neigh4_entry_t const * neigh = fd_neigh4_hmap_query_ele_const( neigh_query );
     508           0 :   if( FD_UNLIKELY( neigh->state != FD_NEIGH4_STATE_ACTIVE ) ) {
     509           0 :     ctx->metrics.tx_neigh_fail_cnt++;
     510           0 :     return 0;
     511           0 :   }
     512             : 
     513           0 :   ctx->tx_op.src_ip = ip4_src;
     514           0 :   memcpy( ctx->tx_op.mac_addrs+0, neigh->mac_addr,   6 );
     515           0 :   memcpy( ctx->tx_op.mac_addrs+6, ctx->src_mac_addr, 6 );
     516             : 
     517           0 :   if( FD_UNLIKELY( fd_neigh4_hmap_query_test( neigh_query ) ) ) {
     518           0 :     ctx->metrics.tx_neigh_fail_cnt++;
     519           0 :     return 0;
     520           0 :   }
     521             : 
     522           0 :   return 1;
     523           0 : }
     524             : 
     525             : /* before_frag is called when a new metadata descriptor for a TX job is
     526             :    found.  This callback determines whether this net tile is responsible
     527             :    for the TX job.  If so, it prepares the TX op for the during_frag and
     528             :    after_frag callbacks. */
     529             : 
     530             : static inline int
     531             : before_frag( fd_net_ctx_t * ctx,
     532             :              ulong          in_idx,
     533             :              ulong          seq,
     534           0 :              ulong          sig ) {
     535           0 :   (void)in_idx; (void)seq;
     536             : 
     537             :   /* Find interface index of next packet */
     538             : 
     539           0 :   ulong proto = fd_disco_netmux_sig_proto( sig );
     540           0 :   if( FD_UNLIKELY( proto!=DST_PROTO_OUTGOING ) ) return 1;
     541             : 
     542           0 :   uint dst_ip = fd_disco_netmux_sig_dst_ip( sig );
     543           0 :   if( FD_UNLIKELY( !net_tx_route( ctx, dst_ip ) ) ) return 1;
     544             : 
     545           0 :   uint net_tile_id  = ctx->net_tile_id;
     546           0 :   uint net_tile_cnt = ctx->net_tile_cnt;
     547           0 :   uint if_idx       = ctx->tx_op.if_idx;
     548           0 :   if( FD_UNLIKELY( if_idx>=ctx->xsk_cnt ) ) return 1; /* ignore */
     549             : 
     550             :   /* Load balance TX */
     551             : 
     552           0 :   uint hash       = (uint)fd_disco_netmux_sig_hash( sig );
     553           0 :   uint target_idx = hash % net_tile_cnt;
     554           0 :   if( if_idx==1 ) target_idx = 0; /* loopback always targets tile 0 */
     555             : 
     556             :   /* Skip if another net tile is responsible for this packet */
     557             : 
     558           0 :   if( net_tile_id!=target_idx ) return 1; /* ignore */
     559             : 
     560             :   /* Skip if TX is blocked */
     561             : 
     562           0 :   if( FD_UNLIKELY( !net_tx_ready( ctx, if_idx ) ) ) {
     563           0 :     ctx->metrics.tx_full_fail_cnt++;
     564           0 :     return 1;
     565           0 :   }
     566             : 
     567             :   /* Allocate buffer for receive */
     568             : 
     569           0 :   fd_net_free_ring_t * free      = &ctx->free_tx;
     570           0 :   ulong                alloc_seq = free->cons;
     571           0 :   void *               frame     = (void *)free->queue[ alloc_seq % free->depth ];
     572           0 :   free->cons = fd_seq_inc( alloc_seq, 1UL );
     573             : 
     574           0 :   ctx->tx_op.if_idx    = if_idx;
     575           0 :   ctx->tx_op.frame     = frame;
     576             : 
     577           0 :   return 0; /* continue */
     578           0 : }
     579             : 
     580             : /* during_frag is called when before_frag has committed to transmit an
     581             :    outgoing packet. */
     582             : 
     583             : static inline void
     584             : during_frag( fd_net_ctx_t * ctx,
     585             :              ulong          in_idx,
     586             :              ulong          seq FD_PARAM_UNUSED,
     587             :              ulong          sig FD_PARAM_UNUSED,
     588             :              ulong          chunk,
     589             :              ulong          sz,
     590           0 :              ulong          ctl FD_PARAM_UNUSED ) {
     591           0 :   if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>FD_NET_MTU ) )
     592           0 :     FD_LOG_ERR(( "chunk %lu %lu corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
     593             : 
     594           0 :   if( FD_UNLIKELY( sz<34UL ) )
     595           0 :     FD_LOG_ERR(( "packet too small %lu (in_idx=%lu)", sz, in_idx ));
     596             : 
     597           0 :   void * frame = ctx->tx_op.frame;
     598           0 :   if( FD_UNLIKELY( (ulong)frame < (ulong)ctx->umem_frame0 ) )
     599           0 :     FD_LOG_ERR(( "frame %p out of bounds (below %p)", frame, (void *)ctx->umem_frame0 ));
     600           0 :   ulong umem_off = (ulong)frame - (ulong)ctx->umem_frame0;
     601           0 :   if( FD_UNLIKELY( (ulong)umem_off > (ulong)ctx->umem_sz ) )
     602           0 :     FD_LOG_ERR(( "frame %p out of bounds (beyond %p)", frame, (void *)ctx->umem_sz ));
     603             : 
     604             :   /* Speculatively copy frame into XDP buffer */
     605           0 :   uchar const * src = fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk );
     606           0 :   fd_memcpy( ctx->tx_op.frame, src, sz );
     607           0 : }
     608             : 
     609             : /* after_frag is called when the during_frag memcpy was _not_ overrun. */
     610             : 
     611             : static void
     612             : after_frag( fd_net_ctx_t *      ctx,
     613             :             ulong               in_idx,
     614             :             ulong               seq,
     615             :             ulong               sig,
     616             :             ulong               sz,
     617             :             ulong               tsorig,
     618             :             ulong               tspub,
     619           0 :             fd_stem_context_t * stem ) {
     620           0 :   (void)in_idx; (void)seq; (void)sig; (void)tsorig; (void)tspub; (void)stem;
     621             : 
     622             :   /* Current send operation */
     623             : 
     624           0 :   uint       if_idx = ctx->tx_op.if_idx;
     625           0 :   uchar *    frame  = ctx->tx_op.frame;
     626           0 :   fd_xsk_t * xsk    = &ctx->xsk[ if_idx ];
     627             : 
     628             :   /* Select Ethernet addresses */
     629           0 :   memcpy( frame, ctx->tx_op.mac_addrs, 12 );
     630             : 
     631             :   /* Select IPv4 source address */
     632           0 :   uint   ihl       = frame[ 14 ] & 0x0f;
     633           0 :   ushort ethertype = FD_LOAD( ushort, frame+12 );
     634           0 :   uint   ip4_saddr = FD_LOAD( uint,   frame+26 );
     635           0 :   if( ethertype==fd_ushort_bswap( FD_ETH_HDR_TYPE_IP ) && ip4_saddr==0 ) {
     636           0 :     if( FD_UNLIKELY( ctx->tx_op.src_ip==0 ||
     637           0 :                      ihl<5 || (14+(ihl<<2))>sz ) ) {
     638             :       /* Outgoing IPv4 packet with unknown src IP or invalid IHL */
     639             :       /* FIXME should select first IPv4 address of device table here */
     640           0 :       ctx->metrics.tx_route_fail_cnt++;
     641           0 :       return;
     642           0 :     }
     643             : 
     644             :     /* Recompute checksum after changing header */
     645           0 :     FD_STORE( uint,   frame+26, ctx->tx_op.src_ip );
     646           0 :     FD_STORE( ushort, frame+24, 0 );
     647           0 :     FD_STORE( ushort, frame+24, fd_ip4_hdr_check( frame+14 ) );
     648           0 :   }
     649             : 
     650             :   /* Submit packet TX job
     651             : 
     652             :      Invariant for ring_tx: prod-cons<length
     653             :      (This invariant breaks if any other packet is sent over this ring
     654             :      between before_frag and this point, e.g. send_arp_probe.) */
     655             : 
     656           0 :   fd_xdp_ring_t * tx_ring = &xsk->ring_tx;
     657           0 :   uint            tx_seq  = FD_VOLATILE_CONST( *tx_ring->prod );
     658           0 :   uint            tx_mask = tx_ring->depth - 1U;
     659           0 :   xsk->ring_tx.packet_ring[ tx_seq&tx_mask ] = (struct xdp_desc) {
     660           0 :     .addr    = (ulong)frame - (ulong)ctx->umem_frame0,
     661           0 :     .len     = (uint)sz,
     662           0 :     .options = 0
     663           0 :   };
     664             : 
     665             :   /* Frame is now owned by kernel. Clear tx_op. */
     666           0 :   ctx->tx_op.frame = NULL;
     667             : 
     668             :   /* Register newly enqueued packet */
     669           0 :   FD_VOLATILE( *xsk->ring_tx.prod ) = tx_ring->cached_prod = tx_seq+1U;
     670           0 :   ctx->metrics.tx_submit_cnt++;
     671           0 :   ctx->metrics.tx_bytes_total += sz;
     672           0 :   fd_net_flusher_inc( ctx->tx_flusher+if_idx, fd_tickcount() );
     673             : 
     674           0 : }
     675             : 
     676             : /* net_rx_packet is called when a new Ethernet frame is available.
     677             :    Attempts to copy out the frame to a downstream tile. */
     678             : 
     679             : static void
     680             : net_rx_packet( fd_net_ctx_t *      ctx,
     681             :                fd_stem_context_t * stem,
     682             :                ulong               umem_off,
     683             :                ulong               sz,
     684           0 :                uint *              freed_chunk ) {
     685             : 
     686           0 :   ulong umem_lowbits = umem_off & 0x3fUL;
     687             : 
     688           0 :   uchar const * packet     = (uchar const *)ctx->umem_frame0 + umem_off;
     689           0 :   uchar const * packet_end = packet + sz;
     690           0 :   uchar const * iphdr      = packet + 14U;
     691             : 
     692             :   /* Translate packet to UMEM frame index */
     693           0 :   ulong chunk = ctx->umem_chunk0 + (umem_off>>FD_CHUNK_LG_SZ);
     694             : 
     695             :   /* Filter for UDP/IPv4 packets. Test for ethtype and ipproto in 1
     696             :      branch */
     697           0 :   uint test_ethip = ( (uint)packet[12] << 16u ) | ( (uint)packet[13] << 8u ) | (uint)packet[23];
     698           0 :   if( FD_UNLIKELY( test_ethip!=0x080011 ) ) {
     699           0 :     FD_LOG_ERR(( "Firedancer received a packet from the XDP program that was either "
     700           0 :                   "not an IPv4 packet, or not a UDP packet. It is likely your XDP program "
     701           0 :                   "is not configured correctly." ));
     702           0 :   }
     703             : 
     704             :   /* IPv4 is variable-length, so lookup IHL to find start of UDP */
     705           0 :   uint iplen = ( ( (uint)iphdr[0] ) & 0x0FU ) * 4U;
     706           0 :   uchar const * udp = iphdr + iplen;
     707             : 
     708             :   /* Ignore if UDP header is too short */
     709           0 :   if( FD_UNLIKELY( udp+8U > packet_end ) ) {
     710           0 :     FD_DTRACE_PROBE( net_tile_err_rx_undersz );
     711           0 :     ctx->metrics.rx_undersz_cnt++;
     712           0 :     return;
     713           0 :   }
     714             : 
     715             :   /* Extract IP dest addr and UDP src/dest port */
     716           0 :   uint ip_srcaddr    =                  *(uint   *)( iphdr+12UL );
     717           0 :   ushort udp_srcport = fd_ushort_bswap( *(ushort *)( udp+0UL    ) );
     718           0 :   ushort udp_dstport = fd_ushort_bswap( *(ushort *)( udp+2UL    ) );
     719             : 
     720           0 :   FD_DTRACE_PROBE_4( net_tile_pkt_rx, ip_srcaddr, udp_srcport, udp_dstport, sz );
     721             : 
     722             :   /* Route packet to downstream tile */
     723           0 :   ushort proto;
     724           0 :   fd_net_out_ctx_t * out;
     725           0 :   if(      FD_UNLIKELY( udp_dstport==ctx->shred_listen_port ) ) {
     726           0 :     proto = DST_PROTO_SHRED;
     727           0 :     out = ctx->shred_out;
     728           0 :   } else if( FD_UNLIKELY( udp_dstport==ctx->quic_transaction_listen_port ) ) {
     729           0 :     proto = DST_PROTO_TPU_QUIC;
     730           0 :     out = ctx->quic_out;
     731           0 :   } else if( FD_UNLIKELY( udp_dstport==ctx->legacy_transaction_listen_port ) ) {
     732           0 :     proto = DST_PROTO_TPU_UDP;
     733           0 :     out = ctx->quic_out;
     734           0 :   } else if( FD_UNLIKELY( udp_dstport==ctx->gossip_listen_port ) ) {
     735           0 :     proto = DST_PROTO_GOSSIP;
     736           0 :     out = ctx->gossip_out;
     737           0 :   } else if( FD_UNLIKELY( udp_dstport==ctx->repair_intake_listen_port ) ) {
     738           0 :     proto = DST_PROTO_REPAIR;
     739           0 :     out = ctx->repair_out;
     740           0 :   } else if( FD_UNLIKELY( udp_dstport==ctx->repair_serve_listen_port ) ) {
     741           0 :     proto = DST_PROTO_REPAIR;
     742           0 :     out = ctx->repair_out;
     743           0 :   } else {
     744             : 
     745           0 :     FD_LOG_ERR(( "Firedancer received a UDP packet on port %hu which was not expected. "
     746           0 :                   "Only the following ports should be configured to forward packets: "
     747           0 :                   "%hu, %hu, %hu, %hu, %hu, %hu (excluding any 0 ports, which can be ignored)."
     748           0 :                   "Please report this error to Firedancer maintainers.",
     749           0 :                   udp_dstport,
     750           0 :                   ctx->shred_listen_port,
     751           0 :                   ctx->quic_transaction_listen_port,
     752           0 :                   ctx->legacy_transaction_listen_port,
     753           0 :                   ctx->gossip_listen_port,
     754           0 :                   ctx->repair_intake_listen_port,
     755           0 :                   ctx->repair_serve_listen_port ));
     756           0 :   }
     757             : 
     758             :   /* tile can decide how to partition based on src ip addr and src port */
     759           0 :   ulong sig = fd_disco_netmux_sig( ip_srcaddr, udp_srcport, 0U, proto, 14UL+8UL+iplen );
     760             : 
     761             :   /* Peek the mline for an old frame */
     762           0 :   fd_frag_meta_t * mline = out->mcache + fd_mcache_line_idx( out->seq, out->depth );
     763           0 :   *freed_chunk = mline->chunk;
     764             : 
     765             :   /* Overwrite the mline with the new frame */
     766           0 :   ulong tspub = (ulong)fd_frag_meta_ts_comp( fd_tickcount() );
     767           0 :   fd_mcache_publish( out->mcache, out->depth, out->seq, sig, chunk, sz, umem_lowbits, 0, tspub );
     768             : 
     769             :   /* Wind up for the next iteration */
     770           0 :   *stem->cr_avail -= stem->cr_decrement_amount;
     771           0 :   out->seq = fd_seq_inc( out->seq, 1UL );
     772             : 
     773           0 :   ctx->metrics.rx_pkt_cnt++;
     774           0 :   ctx->metrics.rx_bytes_total += sz;
     775             : 
     776           0 : }
     777             : 
     778             : /* net_comp_event is called when an XDP TX frame is free again. */
     779             : 
     780             : static void
     781             : net_comp_event( fd_net_ctx_t * ctx,
     782             :                 fd_xsk_t *     xsk,
     783           0 :                 uint           comp_seq ) {
     784             : 
     785             :   /* Locate the incoming frame */
     786             : 
     787           0 :   fd_xdp_ring_t * comp_ring  = &xsk->ring_cr;
     788           0 :   uint            comp_mask  = comp_ring->depth - 1U;
     789           0 :   ulong           frame      = FD_VOLATILE_CONST( comp_ring->frame_ring[ comp_seq&comp_mask ] );
     790           0 :   ulong const     frame_mask = FD_NET_MTU - 1UL;
     791           0 :   if( FD_UNLIKELY( frame+FD_NET_MTU > ctx->umem_sz ) ) {
     792           0 :     FD_LOG_ERR(( "Bounds check failed: frame=0x%lx umem_sz=0x%lx",
     793           0 :                  frame, (ulong)ctx->umem_sz ));
     794           0 :   }
     795             : 
     796             :   /* Check if we have space to return the freed frame */
     797             : 
     798           0 :   fd_net_free_ring_t * free      = &ctx->free_tx;
     799           0 :   ulong                free_prod = free->prod;
     800           0 :   ulong                free_mask = free->depth - 1UL;
     801           0 :   long free_cnt = fd_seq_diff( free_prod, free->cons );
     802           0 :   if( FD_UNLIKELY( free_cnt>=(long)free->depth ) ) return; /* blocked */
     803             : 
     804           0 :   free->queue[ free_prod&free_mask ] = (ulong)ctx->umem_frame0 + (frame & (~frame_mask));
     805           0 :   free->prod = fd_seq_inc( free_prod, 1UL );
     806             : 
     807             :   /* Wind up for next iteration */
     808             : 
     809           0 :   FD_VOLATILE( *comp_ring->cons ) = comp_ring->cached_cons = comp_seq+1U;
     810             : 
     811           0 :   ctx->metrics.tx_complete_cnt++;
     812             : 
     813           0 : }
     814             : 
     815             : /* net_rx_event is called when a new XDP RX frame is available.  Calls
     816             :    net_rx_packet, then returns the packet back to the kernel via the fill
     817             :    ring.  */
     818             : 
     819             : static void
     820             : net_rx_event( fd_net_ctx_t *      ctx,
     821             :               fd_stem_context_t * stem,
     822             :               fd_xsk_t *          xsk,
     823           0 :               uint                rx_seq ) {
     824             : 
     825             :   // FIXME(topointon): Temporarily disabling backpressure feature because it triggers even with FD_TOPOB_UNRELIABLE
     826             :   //if( FD_UNLIKELY( *stem->cr_avail < stem->cr_decrement_amount ) ) {
     827             :   //  ctx->metrics.rx_backp_cnt++;
     828             :   //  return;
     829             :   //}
     830             : 
     831             :   /* Locate the incoming frame */
     832             : 
     833           0 :   fd_xdp_ring_t * rx_ring = &xsk->ring_rx;
     834           0 :   uint            rx_mask = rx_ring->depth - 1U;
     835           0 :   struct xdp_desc frame   = FD_VOLATILE_CONST( rx_ring->packet_ring[ rx_seq&rx_mask ] );
     836             : 
     837           0 :   if( FD_UNLIKELY( frame.len>FD_NET_MTU ) )
     838           0 :     FD_LOG_ERR(( "received a UDP packet with a too large payload (%u)", frame.len ));
     839             : 
     840             :   /* Check if we have space in the fill ring to free the frame */
     841             : 
     842           0 :   fd_xdp_ring_t * fill_ring  = &xsk->ring_fr;
     843           0 :   uint            fill_depth = fill_ring->depth;
     844           0 :   uint            fill_mask  = fill_depth-1U;
     845           0 :   ulong           frame_mask = FD_NET_MTU - 1UL;
     846           0 :   uint            fill_prod  = FD_VOLATILE_CONST( *fill_ring->prod );
     847           0 :   uint            fill_cons  = FD_VOLATILE_CONST( *fill_ring->cons );
     848             : 
     849           0 :   if( FD_UNLIKELY( (int)(fill_prod-fill_cons) >= (int)fill_depth ) ) {
     850           0 :     ctx->metrics.rx_fill_blocked_cnt++;
     851           0 :     return; /* blocked */
     852           0 :   }
     853             : 
     854             :   /* Pass it to the receive handler */
     855             : 
     856           0 :   uint freed_chunk = UINT_MAX;
     857           0 :   net_rx_packet( ctx, stem, frame.addr, frame.len, &freed_chunk );
     858             : 
     859           0 :   FD_COMPILER_MFENCE();
     860           0 :   FD_VOLATILE( *rx_ring->cons ) = rx_ring->cached_cons = rx_seq+1U;
     861             : 
     862             :   /* If this mcache publish shadowed a previous publish, mark the old
     863             :      frame as free. */
     864             : 
     865           0 :   if( FD_LIKELY( freed_chunk!=UINT_MAX ) ) {
     866           0 :     if( FD_UNLIKELY( ( freed_chunk < ctx->umem_chunk0 ) |
     867           0 :                      ( freed_chunk > ctx->umem_wmark ) ) ) {
     868           0 :       FD_LOG_ERR(( "mcache corruption detected: chunk=%u chunk0=%u wmark=%u",
     869           0 :                    freed_chunk, ctx->umem_chunk0, ctx->umem_wmark ));
     870           0 :     }
     871           0 :     ulong freed_off = (freed_chunk - ctx->umem_chunk0)<<FD_CHUNK_LG_SZ;
     872           0 :     fill_ring->frame_ring[ fill_prod&fill_mask ] = freed_off & (~frame_mask);
     873           0 :     FD_VOLATILE( *fill_ring->prod ) = fill_ring->cached_prod = fill_prod+1U;
     874           0 :   }
     875             : 
     876           0 : }
     877             : 
     878             : /* before_credit is called every loop iteration. */
     879             : 
     880             : static void
     881             : before_credit( fd_net_ctx_t *      ctx,
     882             :                fd_stem_context_t * stem,
     883           0 :                int *               charge_busy ) {
     884             :   /* A previous send attempt was overrun.  A corrupt copy of the packet was
     885             :      placed into an XDP frame, but the frame was not yet submitted to the
     886             :      TX ring.  Return the tx buffer to the free list. */
     887             : 
     888           0 :   if( ctx->tx_op.frame ) {
     889           0 :     *charge_busy = 1;
     890           0 :     fd_net_free_ring_t * free      = &ctx->free_tx;
     891           0 :     ulong                alloc_seq = free->prod;
     892           0 :     free->queue[ alloc_seq % free->depth ] = (ulong)ctx->tx_op.frame;
     893           0 :     free->prod = fd_seq_inc( alloc_seq, 1UL );
     894           0 :     ctx->tx_op.frame = NULL;
     895           0 :   }
     896             : 
     897             :   /* Check if new packets are available or if TX frames are free again
     898             :      (Round-robin through sockets) */
     899             : 
     900           0 :   uint       rr_idx = ctx->rr_idx;
     901           0 :   fd_xsk_t * rr_xsk = &ctx->xsk[ rr_idx ];
     902           0 :   ctx->rr_idx++;
     903           0 :   ctx->rr_idx = fd_uint_if( ctx->rr_idx>=ctx->xsk_cnt, 0, ctx->rr_idx );
     904             : 
     905           0 :   net_tx_periodic_wakeup( ctx, rr_idx, fd_tickcount(), charge_busy );
     906             : 
     907           0 :   uint rx_cons = rr_xsk->ring_rx.cached_cons;
     908           0 :   uint rx_prod = FD_VOLATILE_CONST( *rr_xsk->ring_rx.prod );
     909           0 :   if( rx_cons!=rx_prod ) {
     910           0 :     *charge_busy = 1;
     911           0 :     rr_xsk->ring_rx.cached_prod = rx_prod;
     912           0 :     net_rx_event( ctx, stem, rr_xsk, rx_cons );
     913           0 :   } else {
     914           0 :     net_rx_wakeup( ctx, rr_xsk, charge_busy );
     915           0 :   }
     916             : 
     917           0 :   uint comp_cons = FD_VOLATILE_CONST( *rr_xsk->ring_cr.cons );
     918           0 :   uint comp_prod = FD_VOLATILE_CONST( *rr_xsk->ring_cr.prod );
     919           0 :   if( comp_cons!=comp_prod ) {
     920           0 :     *charge_busy = 1;
     921           0 :     rr_xsk->ring_cr.cached_prod = comp_prod;
     922           0 :     net_comp_event( ctx, rr_xsk, comp_cons );
     923           0 :   }
     924             : 
     925           0 : }
     926             : 
     927             : /* net_xsk_bootstrap assigns UMEM frames to the FILL ring. */
     928             : 
     929             : static ulong
     930             : net_xsk_bootstrap( fd_net_ctx_t * ctx,
     931             :                    uint           xsk_idx,
     932           0 :                    ulong          frame_off ) {
     933           0 :   fd_xsk_t * xsk = &ctx->xsk[ xsk_idx ];
     934             : 
     935           0 :   ulong const frame_sz  = FD_NET_MTU;
     936           0 :   ulong const fr_depth  = ctx->xsk[ xsk_idx ].ring_fr.depth/2UL;
     937             : 
     938           0 :   fd_xdp_ring_t * fill      = &xsk->ring_fr;
     939           0 :   uint            fill_prod = fill->cached_prod;
     940           0 :   for( ulong j=0UL; j<fr_depth; j++ ) {
     941           0 :     fill->frame_ring[ j ] = frame_off;
     942           0 :     frame_off += frame_sz;
     943           0 :   }
     944           0 :   FD_VOLATILE( *fill->prod ) = fill->cached_prod = fill_prod + (uint)fr_depth;
     945             : 
     946           0 :   return frame_off;
     947           0 : }
     948             : 
     949             : /* FIXME source MAC address from netlnk tile instead */
     950             : 
     951             : static void
     952             : mac_address( const char * interface,
     953           0 :              uchar *      mac ) {
     954           0 :   int fd = socket( AF_INET, SOCK_DGRAM, 0 );
     955           0 :   struct ifreq ifr;
     956           0 :   ifr.ifr_addr.sa_family = AF_INET;
     957           0 :   strncpy( ifr.ifr_name, interface, IFNAMSIZ );
     958           0 :   if( FD_UNLIKELY( ioctl( fd, SIOCGIFHWADDR, &ifr ) ) )
     959           0 :     FD_LOG_ERR(( "could not get MAC address of interface `%s`: (%i-%s)", interface, errno, fd_io_strerror( errno ) ));
     960           0 :   if( FD_UNLIKELY( close(fd) ) )
     961           0 :     FD_LOG_ERR(( "could not close socket (%i-%s)", errno, fd_io_strerror( errno ) ));
     962           0 :   fd_memcpy( mac, ifr.ifr_hwaddr.sa_data, 6 );
     963           0 : }
     964             : 
     965             : /* privileged_init does the following initialization steps:
     966             : 
     967             :    - Create an AF_XDP socket
     968             :    - Map XDP metadata rings
     969             :    - Register UMEM data region with socket
     970             :    - Insert AF_XDP socket into xsk_map
     971             : 
     972             :    Net tile 0 also runs fd_xdp_install and repeats the above step for
     973             :    the loopback device.  (Unless the main interface is already loopback)
     974             : 
     975             :    Kernel object references:
     976             : 
     977             :      BPF_LINK file descriptor
     978             :       |
     979             :       +-> XDP program installation on NIC
     980             :       |    |
     981             :       |    +-> XDP program <-- BPF_PROG file descriptor (prog_fd)
     982             :       |
     983             :       +-> XSKMAP object <-- BPF_MAP file descriptor (xsk_map) */
     984             : 
     985             : static void
     986             : privileged_init( fd_topo_t *      topo,
     987           0 :                  fd_topo_tile_t * tile ) {
     988           0 :   void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
     989             : 
     990           0 :   FD_SCRATCH_ALLOC_INIT( l, scratch );
     991           0 :   fd_net_ctx_t * ctx     = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_net_ctx_t), sizeof(fd_net_ctx_t) );
     992           0 :   ulong *        free_tx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ulong), tile->net.free_ring_depth * sizeof(ulong) );;
     993             : 
     994           0 :   fd_memset( ctx, 0, sizeof(fd_net_ctx_t) );
     995             : 
     996           0 :   uint if_idx = if_nametoindex( tile->net.interface );
     997           0 :   if( FD_UNLIKELY( !if_idx ) ) FD_LOG_ERR(( "if_nametoindex(%s) failed", tile->net.interface ));
     998             : 
     999           0 :   mac_address( tile->net.interface, ctx->src_mac_addr );
    1000             : 
    1001             :   /* Load up dcache containing UMEM */
    1002             : 
    1003           0 :   void * const dcache_mem          = fd_topo_obj_laddr( topo, tile->net.umem_dcache_obj_id );
    1004           0 :   void * const umem_dcache         = fd_dcache_join( dcache_mem );
    1005           0 :   ulong  const umem_dcache_data_sz = fd_dcache_data_sz( umem_dcache );
    1006           0 :   ulong  const umem_frame_sz       = 2048UL;
    1007             : 
    1008             :   /* Left shrink UMEM region to be 4096 byte aligned */
    1009             : 
    1010           0 :   void * const umem_frame0 = (void *)fd_ulong_align_up( (ulong)umem_dcache, 4096UL );
    1011           0 :   ulong        umem_sz     = umem_dcache_data_sz - ((ulong)umem_frame0 - (ulong)umem_dcache);
    1012           0 :   umem_sz = fd_ulong_align_dn( umem_sz, umem_frame_sz );
    1013             : 
    1014             :   /* Derive chunk bounds */
    1015             : 
    1016           0 :   void * const umem_base   = fd_wksp_containing( dcache_mem );
    1017           0 :   ulong  const umem_chunk0 = ( (ulong)umem_frame0 - (ulong)umem_base )>>FD_CHUNK_LG_SZ;
    1018           0 :   ulong  const umem_wmark  = umem_chunk0 + ( ( umem_sz-umem_frame_sz )>>FD_CHUNK_LG_SZ );
    1019           0 :   if( FD_UNLIKELY( umem_chunk0>UINT_MAX || umem_wmark>UINT_MAX || umem_chunk0>umem_wmark ) ) {
    1020           0 :     FD_LOG_ERR(( "Calculated invalid UMEM bounds [%lu,%lu]", umem_chunk0, umem_wmark ));
    1021           0 :   }
    1022             : 
    1023           0 :   if( FD_UNLIKELY( !umem_base   ) ) FD_LOG_ERR(( "UMEM dcache is not in a workspace" ));
    1024           0 :   if( FD_UNLIKELY( !umem_dcache ) ) FD_LOG_ERR(( "Failed to join UMEM dcache" ));
    1025             : 
    1026           0 :   ctx->umem_frame0 = umem_frame0;
    1027           0 :   ctx->umem_sz     = umem_sz;
    1028           0 :   ctx->umem_chunk0 = (uint)umem_chunk0;
    1029           0 :   ctx->umem_wmark  = (uint)umem_wmark;
    1030             : 
    1031           0 :   ctx->free_tx.queue = free_tx;
    1032           0 :   ctx->free_tx.depth = tile->net.xdp_tx_queue_size;
    1033             : 
    1034             :   /* Create and install XSKs */
    1035             : 
    1036           0 :   fd_xsk_params_t params0 = {
    1037           0 :     .if_idx      = if_idx,
    1038           0 :     .if_queue_id = (uint)tile->kind_id,
    1039             : 
    1040             :     /* Some kernels produce EOPNOTSUP errors on sendto calls when
    1041             :        starting up without either XDP_ZEROCOPY or XDP_COPY
    1042             :        (e.g. 5.14.0-503.23.1.el9_5 with i40e) */
    1043           0 :     .bind_flags  = tile->net.zero_copy ? XDP_ZEROCOPY : XDP_COPY,
    1044             : 
    1045           0 :     .fr_depth  = tile->net.xdp_rx_queue_size*2,
    1046           0 :     .rx_depth  = tile->net.xdp_rx_queue_size,
    1047           0 :     .cr_depth  = tile->net.xdp_tx_queue_size,
    1048           0 :     .tx_depth  = tile->net.xdp_tx_queue_size,
    1049             : 
    1050           0 :     .umem_addr = umem_frame0,
    1051           0 :     .frame_sz  = umem_frame_sz,
    1052           0 :     .umem_sz   = umem_sz
    1053           0 :   };
    1054             : 
    1055           0 :   int xsk_map_fd = 123462;
    1056           0 :   ctx->prog_link_fds[ 0 ] = 123463;
    1057             :   /* Init XSK */
    1058           0 :   if( FD_UNLIKELY( !fd_xsk_init( &ctx->xsk[ 0 ], &params0 ) ) )       FD_LOG_ERR(( "failed to bind xsk for net tile %lu", tile->kind_id ));
    1059           0 :   if( FD_UNLIKELY( !fd_xsk_activate( &ctx->xsk[ 0 ], xsk_map_fd ) ) ) FD_LOG_ERR(( "failed to activate xsk for net tile %lu", tile->kind_id ));
    1060           0 :   ctx->xsk_cnt = 1;
    1061             : 
    1062           0 :   if( FD_UNLIKELY( fd_sandbox_gettid()==fd_sandbox_getpid() ) ) {
    1063             :     /* Kind of gross.. in single threaded mode we don't want to close the xsk_map_fd
    1064             :        since it's shared with other net tiles.  Just check for that by seeing if we
    1065             :        are the only thread in the process. */
    1066           0 :     if( FD_UNLIKELY( -1==close( xsk_map_fd ) ) )                     FD_LOG_ERR(( "close(%d) failed (%d-%s)", xsk_map_fd, errno, fd_io_strerror( errno ) ));
    1067           0 :   }
    1068             : 
    1069             :   /* Networking tile at index 0 also binds to loopback (only queue 0 available on lo) */
    1070             : 
    1071           0 :   if( FD_UNLIKELY( strcmp( tile->net.interface, "lo" ) && !tile->kind_id ) ) {
    1072           0 :     ctx->xsk_cnt = 2;
    1073             : 
    1074           0 :     ushort udp_port_candidates[] = {
    1075           0 :       (ushort)tile->net.legacy_transaction_listen_port,
    1076           0 :       (ushort)tile->net.quic_transaction_listen_port,
    1077           0 :       (ushort)tile->net.shred_listen_port,
    1078           0 :       (ushort)tile->net.gossip_listen_port,
    1079           0 :       (ushort)tile->net.repair_intake_listen_port,
    1080           0 :       (ushort)tile->net.repair_serve_listen_port,
    1081           0 :     };
    1082             : 
    1083           0 :     uint lo_idx = if_nametoindex( "lo" );
    1084           0 :     if( FD_UNLIKELY( !lo_idx ) ) FD_LOG_ERR(( "if_nametoindex(lo) failed" ));
    1085             : 
    1086             :     /* FIXME move this to fd_topo_run */
    1087           0 :     fd_xdp_fds_t lo_fds = fd_xdp_install( lo_idx,
    1088           0 :                                           sizeof(udp_port_candidates)/sizeof(udp_port_candidates[0]),
    1089           0 :                                           udp_port_candidates,
    1090           0 :                                           "skb" );
    1091             : 
    1092           0 :     ctx->prog_link_fds[ 1 ] = lo_fds.prog_link_fd;
    1093             :     /* init xsk 1 */
    1094           0 :     fd_xsk_params_t params1 = params0;
    1095           0 :     params1.if_idx      = lo_idx; /* probably always 1 */
    1096           0 :     params1.if_queue_id = 0;
    1097           0 :     params1.bind_flags  = 0;
    1098           0 :     if( FD_UNLIKELY( !fd_xsk_init( &ctx->xsk[ 1 ], &params1 ) ) )              FD_LOG_ERR(( "failed to bind lo_xsk" ));
    1099           0 :     if( FD_UNLIKELY( !fd_xsk_activate( &ctx->xsk[ 1 ], lo_fds.xsk_map_fd ) ) ) FD_LOG_ERR(( "failed to activate lo_xsk" ));
    1100           0 :     if( FD_UNLIKELY( -1==close( lo_fds.xsk_map_fd ) ) )                        FD_LOG_ERR(( "close(%d) failed (%d-%s)", xsk_map_fd, errno, fd_io_strerror( errno ) ));
    1101           0 :   }
    1102             : 
    1103           0 :   double tick_per_ns = fd_tempo_tick_per_ns( NULL );
    1104           0 :   ctx->xdp_stats_interval_ticks = (long)( FD_XDP_STATS_INTERVAL_NS * tick_per_ns );
    1105             : 
    1106           0 :   ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, 1UL );
    1107           0 :   if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
    1108           0 :     FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
    1109           0 : }
    1110             : 
    1111             : static void
    1112             : unprivileged_init( fd_topo_t *      topo,
    1113           0 :                    fd_topo_tile_t * tile ) {
    1114           0 :   void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
    1115           0 :   FD_SCRATCH_ALLOC_INIT( l, scratch );
    1116           0 :   fd_net_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_net_ctx_t), sizeof(fd_net_ctx_t) );
    1117           0 :   FD_TEST( ctx->xsk_cnt!=0 );
    1118             : 
    1119           0 :   ctx->net_tile_id  = (uint)tile->kind_id;
    1120           0 :   ctx->net_tile_cnt = (uint)fd_topo_tile_name_cnt( topo, tile->name );
    1121             : 
    1122           0 :   ctx->shred_listen_port              = tile->net.shred_listen_port;
    1123           0 :   ctx->quic_transaction_listen_port   = tile->net.quic_transaction_listen_port;
    1124           0 :   ctx->legacy_transaction_listen_port = tile->net.legacy_transaction_listen_port;
    1125           0 :   ctx->gossip_listen_port             = tile->net.gossip_listen_port;
    1126           0 :   ctx->repair_intake_listen_port      = tile->net.repair_intake_listen_port;
    1127           0 :   ctx->repair_serve_listen_port       = tile->net.repair_serve_listen_port;
    1128             : 
    1129             :   /* Put a bound on chunks we read from the input, to make sure they
    1130             :      are within in the data region of the workspace. */
    1131             : 
    1132           0 :   if( FD_UNLIKELY( !tile->in_cnt ) ) FD_LOG_ERR(( "net tile in link cnt is zero" ));
    1133           0 :   if( FD_UNLIKELY( tile->in_cnt>MAX_NET_INS ) ) FD_LOG_ERR(( "net tile in link cnt %lu exceeds MAX_NET_INS %lu", tile->in_cnt, MAX_NET_INS ));
    1134           0 :   FD_TEST( tile->in_cnt<=32 );
    1135           0 :   for( ulong i=0UL; i<tile->in_cnt; i++ ) {
    1136           0 :     fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
    1137           0 :     if( FD_UNLIKELY( link->mtu!=FD_NET_MTU ) ) FD_LOG_ERR(( "net tile in link does not have a normal MTU" ));
    1138             : 
    1139           0 :     ctx->in[ i ].mem    = topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ].wksp;
    1140           0 :     ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
    1141           0 :     ctx->in[ i ].wmark  = fd_dcache_compact_wmark( ctx->in[ i ].mem, link->dcache, link->mtu );
    1142           0 :   }
    1143             : 
    1144           0 :   for( ulong i = 0; i < tile->out_cnt; i++ ) {
    1145           0 :     fd_topo_link_t * out_link = &topo->links[ tile->out_link_id[ i  ] ];
    1146           0 :     if( strcmp( out_link->name, "net_quic" ) == 0 ) {
    1147           0 :       fd_topo_link_t * quic_out = out_link;
    1148           0 :       ctx->quic_out->mcache = quic_out->mcache;
    1149           0 :       ctx->quic_out->sync   = fd_mcache_seq_laddr( ctx->quic_out->mcache );
    1150           0 :       ctx->quic_out->depth  = fd_mcache_depth( ctx->quic_out->mcache );
    1151           0 :       ctx->quic_out->seq    = fd_mcache_seq_query( ctx->quic_out->sync );
    1152           0 :     } else if( strcmp( out_link->name, "net_shred" ) == 0 ) {
    1153           0 :       fd_topo_link_t * shred_out = out_link;
    1154           0 :       ctx->shred_out->mcache = shred_out->mcache;
    1155           0 :       ctx->shred_out->sync   = fd_mcache_seq_laddr( ctx->shred_out->mcache );
    1156           0 :       ctx->shred_out->depth  = fd_mcache_depth( ctx->shred_out->mcache );
    1157           0 :       ctx->shred_out->seq    = fd_mcache_seq_query( ctx->shred_out->sync );
    1158           0 :     } else if( strcmp( out_link->name, "net_gossip" ) == 0 ) {
    1159           0 :       fd_topo_link_t * gossip_out = out_link;
    1160           0 :       ctx->gossip_out->mcache = gossip_out->mcache;
    1161           0 :       ctx->gossip_out->sync   = fd_mcache_seq_laddr( ctx->gossip_out->mcache );
    1162           0 :       ctx->gossip_out->depth  = fd_mcache_depth( ctx->gossip_out->mcache );
    1163           0 :       ctx->gossip_out->seq    = fd_mcache_seq_query( ctx->gossip_out->sync );
    1164           0 :     } else if( strcmp( out_link->name, "net_repair" ) == 0 ) {
    1165           0 :       fd_topo_link_t * repair_out = out_link;
    1166           0 :       ctx->repair_out->mcache = repair_out->mcache;
    1167           0 :       ctx->repair_out->sync   = fd_mcache_seq_laddr( ctx->repair_out->mcache );
    1168           0 :       ctx->repair_out->depth  = fd_mcache_depth( ctx->repair_out->mcache );
    1169           0 :       ctx->repair_out->seq    = fd_mcache_seq_query( ctx->repair_out->sync );
    1170           0 :     } else if( strcmp( out_link->name, "net_netlnk" ) == 0 ) {
    1171           0 :       fd_topo_link_t * netlink_out = out_link;
    1172           0 :       ctx->neigh4_solicit->mcache = netlink_out->mcache;
    1173           0 :       ctx->neigh4_solicit->depth  = fd_mcache_depth( ctx->neigh4_solicit->mcache );
    1174           0 :       ctx->neigh4_solicit->seq    = fd_mcache_seq_query( fd_mcache_seq_laddr( ctx->neigh4_solicit->mcache ) );
    1175           0 :     } else {
    1176           0 :       FD_LOG_ERR(( "unrecognized out link `%s`", out_link->name ));
    1177           0 :     }
    1178           0 :   }
    1179             : 
    1180             :   /* Check if any of the tiles we set a listen port for do not have an outlink. */
    1181           0 :   if( FD_UNLIKELY( ctx->shred_listen_port!=0 && ctx->shred_out->mcache==NULL ) ) {
    1182           0 :     FD_LOG_ERR(( "shred listen port set but no out link was found" ));
    1183           0 :   } else if( FD_UNLIKELY( ctx->quic_transaction_listen_port!=0 && ctx->quic_out->mcache==NULL ) ) {
    1184           0 :     FD_LOG_ERR(( "quic transaction listen port set but no out link was found" ));
    1185           0 :   } else if( FD_UNLIKELY( ctx->legacy_transaction_listen_port!=0 && ctx->quic_out->mcache==NULL ) ) {
    1186           0 :     FD_LOG_ERR(( "legacy transaction listen port set but no out link was found" ));
    1187           0 :   } else if( FD_UNLIKELY( ctx->gossip_listen_port!=0 && ctx->gossip_out->mcache==NULL ) ) {
    1188           0 :     FD_LOG_ERR(( "gossip listen port set but no out link was found" ));
    1189           0 :   } else if( FD_UNLIKELY( ctx->repair_intake_listen_port!=0 && ctx->repair_out->mcache==NULL ) ) {
    1190           0 :     FD_LOG_ERR(( "repair intake port set but no out link was found" ));
    1191           0 :   } else if( FD_UNLIKELY( ctx->repair_serve_listen_port!=0 && ctx->repair_out->mcache==NULL ) ) {
    1192           0 :     FD_LOG_ERR(( "repair serve listen port set but no out link was found" ));
    1193           0 :   } else if( FD_UNLIKELY( ctx->neigh4_solicit->mcache==NULL ) ) {
    1194           0 :     FD_LOG_ERR(( "netlink request link not found" ));
    1195           0 :   }
    1196             : 
    1197           0 :   for( uint j=0U; j<2U; j++ ) {
    1198           0 :     ctx->tx_flusher[ j ].pending_wmark         = (ulong)( (double)tile->net.xdp_tx_queue_size * 0.7 );
    1199           0 :     ctx->tx_flusher[ j ].tail_flush_backoff    = (long)( (double)tile->net.tx_flush_timeout_ns * fd_tempo_tick_per_ns( NULL ) );
    1200           0 :     ctx->tx_flusher[ j ].next_tail_flush_ticks = LONG_MAX;
    1201           0 :   }
    1202             : 
    1203             :   /* Join netbase objects */
    1204           0 :   ctx->fib_local = fd_fib4_join( fd_topo_obj_laddr( topo, tile->net.fib4_local_obj_id ) );
    1205           0 :   ctx->fib_main  = fd_fib4_join( fd_topo_obj_laddr( topo, tile->net.fib4_main_obj_id  ) );
    1206           0 :   if( FD_UNLIKELY( !ctx->fib_local || !ctx->fib_main ) ) FD_LOG_ERR(( "fd_fib4_join failed" ));
    1207           0 :   if( FD_UNLIKELY( !fd_neigh4_hmap_join(
    1208           0 :       ctx->neigh4,
    1209           0 :       fd_topo_obj_laddr( topo, tile->net.neigh4_obj_id ),
    1210           0 :       fd_topo_obj_laddr( topo, tile->net.neigh4_ele_obj_id ) ) ) ) {
    1211           0 :     FD_LOG_ERR(( "fd_neigh4_hmap_join failed" ));
    1212           0 :   }
    1213             : 
    1214             :   /* Initialize TX free ring */
    1215             : 
    1216           0 :   ulong const frame_sz  = 2048UL;
    1217           0 :   ulong       frame_off = 0UL;
    1218           0 :   ulong const tx_depth  = ctx->free_tx.depth;
    1219           0 :   for( ulong j=0; j<tx_depth; j++ ) {
    1220           0 :     ctx->free_tx.queue[ j ] = (ulong)ctx->umem_frame0 + frame_off;
    1221           0 :     frame_off += frame_sz;
    1222           0 :   }
    1223           0 :   ctx->free_tx.prod = tx_depth;
    1224             : 
    1225             :   /* Initialize RX mcache chunks */
    1226             : 
    1227           0 :   for( ulong i=0UL; i<(tile->out_cnt); i++ ) {
    1228           0 :     fd_topo_link_t * out_link = &topo->links[ tile->out_link_id[ i  ] ];
    1229           0 :     fd_frag_meta_t * mcache   = out_link->mcache;
    1230           0 :     for( ulong j=0UL; j<fd_mcache_depth( mcache ); j++ ) {
    1231           0 :       mcache[ j ].chunk = (uint)( ctx->umem_chunk0 + (frame_off>>FD_CHUNK_LG_SZ) );
    1232           0 :       frame_off += frame_sz;
    1233           0 :     }
    1234           0 :   }
    1235             : 
    1236             :   /* Initialize FILL ring */
    1237             : 
    1238           0 :   int _charge_busy = 0;
    1239           0 :   for( uint j=0U; j<ctx->xsk_cnt; j++ ) {
    1240           0 :     frame_off = net_xsk_bootstrap( ctx, j, frame_off );
    1241           0 :     net_rx_wakeup( ctx, &ctx->xsk[ j ], &_charge_busy );
    1242           0 :     net_tx_wakeup( ctx, &ctx->xsk[ j ], &_charge_busy );
    1243           0 :   }
    1244             : 
    1245           0 :   if( FD_UNLIKELY( frame_off > ctx->umem_sz ) ) {
    1246           0 :     FD_LOG_ERR(( "UMEM is too small" ));
    1247           0 :   }
    1248           0 : }
    1249             : 
    1250             : static ulong
    1251             : populate_allowed_seccomp( fd_topo_t const *      topo,
    1252             :                           fd_topo_tile_t const * tile,
    1253             :                           ulong                  out_cnt,
    1254           0 :                           struct sock_filter *   out ) {
    1255           0 :   void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
    1256           0 :   FD_SCRATCH_ALLOC_INIT( l, scratch );
    1257           0 :   fd_net_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_net_ctx_t ), sizeof( fd_net_ctx_t ) );
    1258             : 
    1259             :   /* A bit of a hack, if there is no loopback XSK for this tile, we still need to pass
    1260             :      two "allow" FD arguments to the net policy, so we just make them both the same. */
    1261           0 :   int allow_fd2 = ctx->xsk_cnt>1UL ? ctx->xsk[ 1 ].xsk_fd : ctx->xsk[ 0 ].xsk_fd;
    1262           0 :   FD_TEST( ctx->xsk[ 0 ].xsk_fd >= 0 && allow_fd2 >= 0 );
    1263           0 :   populate_sock_filter_policy_xdp( out_cnt, out, (uint)fd_log_private_logfile_fd(), (uint)ctx->xsk[ 0 ].xsk_fd, (uint)allow_fd2 );
    1264           0 :   return sock_filter_policy_xdp_instr_cnt;
    1265           0 : }
    1266             : 
    1267             : static ulong
    1268             : populate_allowed_fds( fd_topo_t const *      topo,
    1269             :                       fd_topo_tile_t const * tile,
    1270             :                       ulong                  out_fds_cnt,
    1271           0 :                       int *                  out_fds ) {
    1272           0 :   void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
    1273           0 :   FD_SCRATCH_ALLOC_INIT( l, scratch );
    1274           0 :   fd_net_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_net_ctx_t ), sizeof( fd_net_ctx_t ) );
    1275             : 
    1276           0 :   if( FD_UNLIKELY( out_fds_cnt<6UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
    1277             : 
    1278           0 :   ulong out_cnt = 0UL;
    1279             : 
    1280           0 :   out_fds[ out_cnt++ ] = 2; /* stderr */
    1281           0 :   if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
    1282           0 :     out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
    1283             : 
    1284           0 :                                       out_fds[ out_cnt++ ] = ctx->xsk[ 0 ].xsk_fd;
    1285           0 :                                       out_fds[ out_cnt++ ] = ctx->prog_link_fds[ 0 ];
    1286           0 :   if( FD_LIKELY( ctx->xsk_cnt>1UL ) ) out_fds[ out_cnt++ ] = ctx->xsk[ 1 ].xsk_fd;
    1287           0 :   if( FD_LIKELY( ctx->xsk_cnt>1UL ) ) out_fds[ out_cnt++ ] = ctx->prog_link_fds[ 1 ];
    1288           0 :   return out_cnt;
    1289           0 : }
    1290             : 
    1291           0 : #define STEM_BURST (1UL)
    1292           0 : #define STEM_LAZY ((ulong)30e3) /* 30 us */
    1293             : 
    1294           0 : #define STEM_CALLBACK_CONTEXT_TYPE  fd_net_ctx_t
    1295           0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_net_ctx_t)
    1296             : 
    1297           0 : #define STEM_CALLBACK_METRICS_WRITE       metrics_write
    1298           0 : #define STEM_CALLBACK_DURING_HOUSEKEEPING during_housekeeping
    1299           0 : #define STEM_CALLBACK_BEFORE_CREDIT       before_credit
    1300           0 : #define STEM_CALLBACK_BEFORE_FRAG         before_frag
    1301           0 : #define STEM_CALLBACK_DURING_FRAG         during_frag
    1302           0 : #define STEM_CALLBACK_AFTER_FRAG          after_frag
    1303             : 
    1304             : #include "../../stem/fd_stem.c"
    1305             : 
    1306             : fd_topo_run_tile_t fd_tile_net = {
    1307             :   .name                     = "net",
    1308             :   .populate_allowed_seccomp = populate_allowed_seccomp,
    1309             :   .populate_allowed_fds     = populate_allowed_fds,
    1310             :   .scratch_align            = scratch_align,
    1311             :   .scratch_footprint        = scratch_footprint,
    1312             :   .privileged_init          = privileged_init,
    1313             :   .unprivileged_init        = unprivileged_init,
    1314             :   .run                      = stem_run,
    1315             : };

Generated by: LCOV version 1.14