LCOV - code coverage report
Current view: top level - waltz/xdp - fd_xsk.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 160 0.0 %
Date: 2025-03-20 12:08:36 Functions: 0 6 0.0 %

          Line data    Source code
       1             : #if !defined(__linux__)
       2             : #error "fd_xsk requires Linux operating system with XDP support"
       3             : #endif
       4             : 
       5             : #include <errno.h>
       6             : #include <stdio.h> /* snprintf */
       7             : #include <unistd.h>
       8             : #include <sys/mman.h> /* mmap */
       9             : #include <sys/types.h>
      10             : #include <sys/socket.h> /* sendto */
      11             : 
      12             : #include "../../util/log/fd_log.h"
      13             : #include "fd_xsk.h"
      14             : 
      15             : /* Join/leave *********************************************************/
      16             : 
      17             : /* fd_xsk_mmap_offset_cstr: Returns a cstr describing the given offset
      18             :    param (6th argument of mmap(2)) assuming fd (5th param of mmap(2)) is
      19             :    an XSK file descriptor.  Returned cstr is valid until next call. */
      20             : static char const *
      21           0 : fd_xsk_mmap_offset_cstr( long mmap_off ) {
      22           0 :   switch( mmap_off ) {
      23           0 :   case XDP_PGOFF_RX_RING:              return "XDP_PGOFF_RX_RING";
      24           0 :   case XDP_PGOFF_TX_RING:              return "XDP_PGOFF_TX_RING";
      25           0 :   case XDP_UMEM_PGOFF_FILL_RING:       return "XDP_UMEM_PGOFF_FILL_RING";
      26           0 :   case XDP_UMEM_PGOFF_COMPLETION_RING: return "XDP_UMEM_PGOFF_COMPLETION_RING";
      27           0 :   default: {
      28           0 :     static char buf[ 19UL ];
      29           0 :     snprintf( buf, 19UL, "0x%lx", (ulong)mmap_off );
      30           0 :     return buf;
      31           0 :   }
      32           0 :   }
      33           0 : }
      34             : 
      35             : /* fd_xsk_mmap_ring maps the given XSK ring into the local address space
      36             :    and populates fd_ring_desc_t.  Every successful call to this function
      37             :    should eventually be paired with a call to fd_xsk_munmap_ring(). */
      38             : static int
      39             : fd_xsk_mmap_ring( fd_xdp_ring_t * ring,
      40             :                   int             xsk_fd,
      41             :                   long            map_off,
      42             :                   ulong           elem_sz,
      43             :                   ulong           depth,
      44           0 :                   struct xdp_ring_offset const * ring_offset ) {
      45             :   /* TODO what is ring_offset->desc ? */
      46             :   /* TODO: mmap was originally called with MAP_POPULATE,
      47             :            but this symbol isn't available with this build */
      48             : 
      49             :   /* sanity check */
      50           0 :   if( depth > (ulong)UINT_MAX ) {
      51           0 :     return -1;
      52           0 :   }
      53             : 
      54           0 :   ulong map_sz = ring_offset->desc + depth*elem_sz;
      55             : 
      56           0 :   void * res = mmap( NULL, map_sz, PROT_READ|PROT_WRITE, MAP_SHARED, xsk_fd, map_off );
      57           0 :   if( FD_UNLIKELY( res==MAP_FAILED ) ) {
      58           0 :     FD_LOG_WARNING(( "mmap(NULL, %lu, PROT_READ|PROT_WRITE, MAP_SHARED, xsk_fd, %s) failed (%i-%s)",
      59           0 :                      map_sz, fd_xsk_mmap_offset_cstr( map_off ), errno, fd_io_strerror( errno ) ));
      60           0 :     return -1;
      61           0 :   }
      62             : 
      63             :   /* TODO add unit test asserting that cached prod/cons seq gets
      64             :           cleared on join */
      65           0 :   fd_memset( ring, 0, sizeof(fd_xdp_ring_t) );
      66             : 
      67           0 :   ring->mem    = res;
      68           0 :   ring->map_sz = map_sz;
      69           0 :   ring->depth  = (uint)depth;
      70           0 :   ring->ptr    = (void *)( (ulong)res + ring_offset->desc     );
      71           0 :   ring->flags  = (uint *)( (ulong)res + ring_offset->flags    );
      72           0 :   ring->prod   = (uint *)( (ulong)res + ring_offset->producer );
      73           0 :   ring->cons   = (uint *)( (ulong)res + ring_offset->consumer );
      74             : 
      75           0 :   return 0;
      76           0 : }
      77             : 
      78             : /* fd_xsk_munmap_ring unmaps the given XSK ring from the local address
      79             :    space and zeroes fd_ring_desc_t. */
      80             : static void
      81             : fd_xsk_munmap_ring( fd_xdp_ring_t * ring,
      82           0 :                     long             map_off ) {
      83           0 :   if( FD_UNLIKELY( !ring->mem ) ) return;
      84             : 
      85           0 :   void * mem = ring->mem;
      86           0 :   ulong  sz  = ring->map_sz;
      87             : 
      88           0 :   fd_memset( ring, 0, sizeof(fd_xdp_ring_t) );
      89             : 
      90           0 :   if( FD_UNLIKELY( 0!=munmap( mem, sz ) ) )
      91           0 :     FD_LOG_WARNING(( "munmap(%p, %lu) on %s ring failed (%i-%s)",
      92           0 :                      mem, sz, fd_xsk_mmap_offset_cstr( map_off ), errno, fd_io_strerror( errno ) ));
      93           0 : }
      94             : 
      95             : /* fd_xsk_cleanup undoes a (partial) join by releasing all active kernel
      96             :    objects, such as mapped memory regions and file descriptors.  Assumes
      97             :    that no join to `xsk` is currently being used. */
      98             : 
      99             : fd_xsk_t *
     100           0 : fd_xsk_fini( fd_xsk_t * xsk ) {
     101             :   /* Undo memory mappings */
     102             : 
     103           0 :   fd_xsk_munmap_ring( &xsk->ring_rx, XDP_PGOFF_RX_RING              );
     104           0 :   fd_xsk_munmap_ring( &xsk->ring_tx, XDP_PGOFF_TX_RING              );
     105           0 :   fd_xsk_munmap_ring( &xsk->ring_fr, XDP_UMEM_PGOFF_FILL_RING       );
     106           0 :   fd_xsk_munmap_ring( &xsk->ring_cr, XDP_UMEM_PGOFF_COMPLETION_RING );
     107             : 
     108             :   /* Release XSK */
     109             : 
     110           0 :   if( FD_LIKELY( xsk->xsk_fd>=0 ) ) {
     111             :     /* Clear XSK descriptors */
     112           0 :     fd_memset( &xsk->offsets, 0, sizeof(struct xdp_mmap_offsets) );
     113             :     /* Close XSK */
     114           0 :     close( xsk->xsk_fd );
     115           0 :     xsk->xsk_fd = -1;
     116           0 :   }
     117             : 
     118           0 :   return xsk;
     119           0 : }
     120             : 
     121             : /* fd_xsk_setup_umem: Initializes xdp_umem_reg and hooks up XSK with
     122             :    UMEM rings via setsockopt(). Retrieves xdp_mmap_offsets via
     123             :    getsockopt().  Returns 0 on success, -1 on failure. */
     124             : static int
     125             : fd_xsk_setup_umem( fd_xsk_t *              xsk,
     126           0 :                    fd_xsk_params_t const * params ) {
     127             : 
     128             :   /* Initialize xdp_umem_reg */
     129           0 :   struct xdp_umem_reg umem_reg = {
     130           0 :     .addr       = (ulong)params->umem_addr,
     131           0 :     .len        = params->umem_sz,
     132           0 :     .chunk_size = (uint)params->frame_sz,
     133           0 :   };
     134             : 
     135             :   /* Register UMEM region */
     136           0 :   int res;
     137           0 :   res = setsockopt( xsk->xsk_fd, SOL_XDP, XDP_UMEM_REG,
     138           0 :                     &umem_reg, sizeof(struct xdp_umem_reg) );
     139           0 :   if( FD_UNLIKELY( res!=0 ) ) {
     140           0 :     FD_LOG_WARNING(( "setsockopt(SOL_XDP,XDP_UMEM_REG(addr=%p,len=%lu,chunk_size=%lu)) failed (%i-%s)",
     141           0 :                      (void *)umem_reg.addr, (ulong)umem_reg.len, (ulong)umem_reg.chunk_size,
     142           0 :                      errno, fd_io_strerror( errno ) ));
     143           0 :     return -1;
     144           0 :   }
     145             : 
     146             :   /* Set ring frame counts */
     147           0 : # define FD_SET_XSK_RING_DEPTH(name, var)                                 \
     148           0 :     do {                                                                  \
     149           0 :       res = setsockopt( xsk->xsk_fd, SOL_XDP, name, &(var), 8UL );        \
     150           0 :       if( FD_UNLIKELY( res!=0 ) ) {                                       \
     151           0 :         FD_LOG_WARNING(( "setsockopt(SOL_XDP," #name ",%lu) failed (%i-%s)", \
     152           0 :                          var, errno, fd_io_strerror( errno ) ));          \
     153           0 :         return -1;                                                        \
     154           0 :       }                                                                   \
     155           0 :     } while(0)
     156           0 :   FD_SET_XSK_RING_DEPTH( XDP_UMEM_FILL_RING,       params->fr_depth );
     157           0 :   FD_SET_XSK_RING_DEPTH( XDP_RX_RING,              params->rx_depth );
     158           0 :   FD_SET_XSK_RING_DEPTH( XDP_TX_RING,              params->tx_depth );
     159           0 :   FD_SET_XSK_RING_DEPTH( XDP_UMEM_COMPLETION_RING, params->cr_depth );
     160           0 : # undef FD_SET_XSK_RING_DEPTH
     161             : 
     162             :   /* Request ring offsets */
     163           0 :   socklen_t offsets_sz = sizeof(struct xdp_mmap_offsets);
     164           0 :   res = getsockopt( xsk->xsk_fd, SOL_XDP, XDP_MMAP_OFFSETS,
     165           0 :                     &xsk->offsets, &offsets_sz );
     166           0 :   if( FD_UNLIKELY( res!=0 ) ) {
     167           0 :     FD_LOG_WARNING(( "getsockopt(SOL_XDP, XDP_MMAP_OFFSETS) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     168           0 :     return -1;
     169           0 :   }
     170             : 
     171             :   /* OK */
     172           0 :   return 0;
     173           0 : }
     174             : 
     175             : /* fd_xsk_init: Creates and configures an XSK socket object, and
     176             :    attaches to a preinstalled XDP program.  The various steps are
     177             :    implemented in fd_xsk_setup_{...}. */
     178             : 
     179             : fd_xsk_t *
     180             : fd_xsk_init( fd_xsk_t *              xsk,
     181           0 :              fd_xsk_params_t const * params ) {
     182             : 
     183           0 :   if( FD_UNLIKELY( !xsk ) ) { FD_LOG_WARNING(( "NULL xsk" )); return NULL; }
     184           0 :   memset( xsk, 0, sizeof(fd_xsk_t) );
     185             : 
     186           0 :   if( FD_UNLIKELY( !params->if_idx ) ) { FD_LOG_WARNING(( "zero if_idx" )); return NULL; }
     187           0 :   if( FD_UNLIKELY( (!params->fr_depth) | (!params->rx_depth) |
     188           0 :                    (!params->tx_depth) | (!params->cr_depth) ) ) {
     189           0 :     FD_LOG_WARNING(( "invalid {fr,rx,tx,cr}_depth" ));
     190           0 :     return NULL;
     191           0 :   }
     192           0 :   if( FD_UNLIKELY( !params->umem_addr ) ) {
     193           0 :     FD_LOG_WARNING(( "NULL umem_addr" ));
     194           0 :     return NULL;
     195           0 :   }
     196           0 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)params->umem_addr, 4096UL ) ) ) {
     197           0 :     FD_LOG_WARNING(( "misaligned params->umem_addr" ));
     198           0 :     return NULL;
     199           0 :   }
     200           0 :   if( FD_UNLIKELY( !params->frame_sz || !fd_ulong_is_pow2( params->frame_sz ) ) ) {
     201           0 :     FD_LOG_WARNING(( "invalid frame_sz" ));
     202           0 :     return NULL;
     203           0 :   }
     204             : 
     205           0 :   xsk->if_idx      = params->if_idx;
     206           0 :   xsk->if_queue_id = params->if_queue_id;
     207             : 
     208             :   /* Create XDP socket (XSK) */
     209             : 
     210           0 :   xsk->xsk_fd = socket( AF_XDP, SOCK_RAW, 0 );
     211           0 :   if( FD_UNLIKELY( xsk->xsk_fd<0 ) ) {
     212           0 :     FD_LOG_WARNING(( "Failed to create XSK (%i-%s)", errno, fd_io_strerror( errno ) ));
     213           0 :     return NULL;
     214           0 :   }
     215             : 
     216             :   /* Associate UMEM region of fd_xsk_t with XSK via setsockopt() */
     217             : 
     218           0 :   if( FD_UNLIKELY( 0!=fd_xsk_setup_umem( xsk, params ) ) ) goto fail;
     219             : 
     220             :   /* Map XSK rings into local address space */
     221             : 
     222           0 :   if( FD_UNLIKELY( 0!=fd_xsk_mmap_ring( &xsk->ring_rx, xsk->xsk_fd, XDP_PGOFF_RX_RING,              sizeof(struct xdp_desc), params->rx_depth, &xsk->offsets.rx ) ) ) goto fail;
     223           0 :   if( FD_UNLIKELY( 0!=fd_xsk_mmap_ring( &xsk->ring_tx, xsk->xsk_fd, XDP_PGOFF_TX_RING,              sizeof(struct xdp_desc), params->tx_depth, &xsk->offsets.tx ) ) ) goto fail;
     224           0 :   if( FD_UNLIKELY( 0!=fd_xsk_mmap_ring( &xsk->ring_fr, xsk->xsk_fd, XDP_UMEM_PGOFF_FILL_RING,       sizeof(ulong),           params->fr_depth, &xsk->offsets.fr ) ) ) goto fail;
     225           0 :   if( FD_UNLIKELY( 0!=fd_xsk_mmap_ring( &xsk->ring_cr, xsk->xsk_fd, XDP_UMEM_PGOFF_COMPLETION_RING, sizeof(ulong),           params->cr_depth, &xsk->offsets.cr ) ) ) goto fail;
     226             : 
     227             :   /* Bind XSK to queue on network interface */
     228             : 
     229           0 :   uint flags = XDP_USE_NEED_WAKEUP | params->bind_flags;
     230           0 :   struct sockaddr_xdp sa = {
     231           0 :     .sxdp_family   = PF_XDP,
     232           0 :     .sxdp_ifindex  = xsk->if_idx,
     233           0 :     .sxdp_queue_id = xsk->if_queue_id,
     234             :     /* See extended commentary below for details on XDP_USE_NEED_WAKEUP
     235             :        flag. */
     236           0 :     .sxdp_flags    = (ushort)flags
     237           0 :   };
     238             : 
     239           0 :   char if_name[ IF_NAMESIZE ] = {0};
     240             : 
     241           0 :   if( FD_UNLIKELY( 0!=bind( xsk->xsk_fd, (void *)&sa, sizeof(struct sockaddr_xdp) ) ) ) {
     242           0 :     FD_LOG_WARNING(( "bind( PF_XDP, ifindex=%u (%s), queue_id=%u, flags=%x ) failed (%i-%s)",
     243           0 :                      xsk->if_idx, if_indextoname( xsk->if_idx, if_name ),
     244           0 :                      xsk->if_queue_id, flags,
     245           0 :                      errno, fd_io_strerror( errno ) ));
     246           0 :     goto fail;
     247           0 :   }
     248             : 
     249             :   /* We've seen that some popular Intel NICs seem to have a bug that
     250             :      prevents them from working in SKB mode with certain kernel
     251             :      versions.  We can identify them by sendto returning ENXIO or EINVAL
     252             :      in newer versions.  The core of the problem is that the kernel
     253             :      calls the generic ndo_bpf pointer instead of the driver-specific
     254             :      version.  This means that the driver's pointer to the BPF program
     255             :      never gets set, yet the driver's wakeup function gets called. */
     256           0 :   if( FD_UNLIKELY( -1==sendto( xsk->xsk_fd, NULL, 0, MSG_DONTWAIT, NULL, 0 ) ) ) {
     257           0 :     if( FD_LIKELY( errno==ENXIO || errno==EINVAL ) ) {
     258           0 :       FD_LOG_ERR(( "xsk sendto failed xsk_fd=%d (%i-%s).  This likely indicates "
     259           0 :                    "a bug with your NIC driver.  Try switching XDP mode using "
     260           0 :                    "tiles.net.xdp_mode in the configuration TOML.\n"
     261           0 :                    "Certain Intel NICs with certain driver/kernel combinations "
     262           0 :                    "are known to exhibit this issue in skb mode but work in drv "
     263           0 :                    "mode.", xsk->xsk_fd, errno, fd_io_strerror( errno ) ));
     264           0 :     } else {
     265           0 :       FD_LOG_WARNING(( "xsk sendto failed xsk_fd=%d (%i-%s)", xsk->xsk_fd, errno, fd_io_strerror( errno ) ));
     266           0 :     }
     267           0 :   }
     268             : 
     269             :   /* XSK successfully configured.  Traffic will arrive in XSK after
     270             :      configuring an XDP program to forward packets via XDP_REDIRECT.
     271             :      This requires providing the XSK file descriptor to the program via
     272             :      XSKMAP and is done in a separate step. */
     273             : 
     274           0 :   FD_LOG_INFO(( "AF_XDP socket initialized: bind( PF_XDP, ifindex=%u (%s), queue_id=%u, flags=%x ) success",
     275           0 :                 xsk->if_idx, if_indextoname( xsk->if_idx, if_name ), xsk->if_queue_id, flags ));
     276             : 
     277           0 :   return xsk;
     278             : 
     279           0 : fail:
     280           0 :   fd_xsk_fini( xsk );
     281           0 :   return NULL;
     282           0 : }

Generated by: LCOV version 1.14