LCOV - code coverage report
Current view: top level - waltz/xdp - fd_xdp1.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 160 0.0 %
Date: 2025-08-05 05:04:49 Functions: 0 2 0.0 %

          Line data    Source code
       1             : #define _GNU_SOURCE
       2             : #include "fd_xdp1.h"
       3             : 
       4             : #include "fd_xdp_license.h"
       5             : #include "../ebpf/fd_linux_bpf.h"
       6             : #include "../ebpf/fd_ebpf_asm.h"
       7             : 
       8             : #include <errno.h>
       9             : #include <unistd.h>
      10             : #include <net/if.h>
      11             : #include <sys/syscall.h>
      12             : #include <linux/bpf.h>
      13             : #include <linux/if_link.h>
      14             : 
      15             : /* Define some kernel uapi constants in case the user is compiling
      16             :    with older kernel headers.  This is especially a problem on Ubuntu
      17             :    20.04 which supports these functions, but doesn't have them in
      18             :    the default headers. */
      19             : 
      20             : #ifndef BPF_LINK_CREATE
      21           0 : #define BPF_LINK_CREATE (28)
      22             : #endif
      23             : 
      24             : #ifndef BPF_XDP
      25           0 : #define BPF_XDP (37)
      26             : #endif
      27             : 
      28             : struct __attribute__((aligned(8))) bpf_link_create {
      29             :   uint prog_fd;
      30             :   uint target_ifindex;
      31             :   uint attach_type;
      32             :   uint flags;
      33             : };
      34             : 
      35             : 
      36             : /*
      37             : 
      38             :   ┌─────────────────────────────────────────┐
      39             :   │   Packet Classification Control Flow    │
      40             :   |                                         |
      41             :   │       ┌────────┐                        │
      42             :   │       │Ethernet│                        │
      43             :   │       └───┬────┘                        │
      44             :   │           │                             │
      45             :   │       ┌───▼────┐                        │
      46             :   │       │ IPV4   │                        │
      47             :   │       └──┬───┬─┘                        │
      48             :   │          │   │                          │
      49             :   │          │   └────►──────┐              │
      50             :   │          │        │ GRE  │              │
      51             :   │          │        └───┬──┘              │
      52             :   │          │            │                 │
      53             :   │          │        ┌───▼────────┐        │
      54             :   │          │        │ Inner IPV4 │        │
      55             :   │          │        ├────────────┘        │
      56             :   │          │        │                     │
      57             :   │      ┌───▼──◄─────┘                     │
      58             :   │      │ UDP  │                           │
      59             :   │      └──┬───┘                           │
      60             :   |         │                               |
      61             :   |  ┌──────▼───────┐   ┌──────────────┐    |
      62             :   |  | LBL_REDIRECT |   |   LBL_PASS   |    |
      63             :   |  └──────────────┘   └──────────────┘    |
      64             :   └─────────────────────────────────────────┘
      65             : 
      66             :   fd_xdp_gen_program dynamically generates an eBPF bytecode program to
      67             :   classify incoming network packets in an XDP context. Each box in the above
      68             :   diagram represents a phase during the classification. NON-GRE packets follow
      69             :   the path of Ethernet-->IPV4-->UDP. GRE packets follow the path of
      70             :   Ethernet-->IPV4-->GRE-->Inner IPV4-->UDP. Based on the packet's protocol and
      71             :   specific header values, the function decides whether to redirect the packet
      72             :   to the firedancer net tile (LBL_REDIRECT) or pass the packet to the
      73             :   kernel (LBL_PASS).
      74             : */
      75             : ulong
      76             : fd_xdp_gen_program( ulong          code_buf[ 512 ],
      77             :                     int            xsks_fd,
      78             :                     uint           listen_ip4_addr,
      79             :                     ushort const * ports,
      80             :                     ulong          ports_cnt,
      81           0 :                     int            allowed_gre ) {
      82             : 
      83           0 :   #define LBL_PASS      1   // Pass the packet to the kernel
      84           0 :   #define LBL_REDIRECT  2   // Redirect the packet to firedancer software
      85             : 
      86           0 :   #define LBL_GRE_CHECK     3   // Ethernet-->IPV4-->GRE-->Inner IPV4-->UDP
      87           0 :   #define LBL_UDP_CHECK     4   // Ethernet-->IPV4-->UDP
      88             : 
      89           0 :   if( FD_UNLIKELY( ports_cnt>16UL ) ) {
      90           0 :     FD_LOG_ERR(( "Too many XDP UDP ports (%lu)", ports_cnt ));
      91           0 :   }
      92             : 
      93           0 :   ulong * code = code_buf;
      94           0 :   *(code++) = FD_EBPF( ldxw, r2, r1, 0                          );  // r2 = xdp_md->data
      95           0 :   *(code++) = FD_EBPF( ldxw, r3, r1, 4                          );  // r3 = xdp_md->data_end
      96             : 
      97           0 :   *(code++) = FD_EBPF( mov64_reg, r5, r2                        );
      98           0 :   *(code++) = FD_EBPF( add64_imm, r5, 34                        );  // Bound check accessing the eth_hdr (14 bytes) and the ip4_hdr (20 bytes)
      99           0 :   *(code++) = FD_EBPF( jgt_reg, r5, r3, LBL_PASS                );  // if r2+34 > r3 goto LBL_PASS
     100             : 
     101           0 :   *(code++) = FD_EBPF( ldxh, r5, r2, 12                         );
     102           0 :   *(code++) = FD_EBPF( jne_imm, r5, 0x0008, LBL_PASS            );  // if eth_hdr->net_type != IP4 goto LBL_PASS
     103             : 
     104             :   /* Advance r2 to the start of first ip4_hdr */
     105           0 :   *(code++) = FD_EBPF( add64_imm, r2, 14                        );
     106             : 
     107             :   /* Calculate the start of next hdr and store in r4 */
     108           0 :   *(code++) = FD_EBPF( ldxb, r4, r2, 0                          );  // r4 = ip4_hdr->verihl
     109           0 :   *(code++) = FD_EBPF( and64_imm, r4, 0x0f                      );  // r4 = ip4_hdr->ihl (lsb of ip4_hrd->verihl)
     110           0 :   *(code++) = FD_EBPF( lsh64_imm, r4, 2                         );  // r4 = ip4_hdr->ihl*4 (length of ipv4 header)
     111           0 :   *(code++) = FD_EBPF( add64_reg, r4, r2                        );  // r4 = &ip4_hdr + length of ip4_hdr = start of next hdr
     112             : 
     113             :   /* Check if the next hdr is udp or gre */
     114           0 :   *(code++) = FD_EBPF( ldxb, r5, r2, 9                          );  // r5 = ip4_hdr->protocol
     115             : 
     116           0 :   if( allowed_gre==1 ) {
     117           0 :     *(code++) = FD_EBPF( jeq_imm, r5, 47, LBL_GRE_CHECK );  // if ip4_hdr->protocol == GRE goto gre_check
     118           0 :   }
     119             : 
     120           0 :   *(code++) = FD_EBPF( jeq_imm, r5, 17, LBL_UDP_CHECK           );  // if ip4_hdr->protocol == UDP goto udp_check
     121           0 :   *(code++) = FD_EBPF( ja, LBL_PASS                             );  // goto LBL_PASS
     122             : 
     123             : 
     124             :   /* next hdr is gre */
     125           0 :   ulong * gre_check = code;
     126             : 
     127             :   /* Advance r2 to start of gre_hdr */
     128           0 :   *(code++) = FD_EBPF( mov64_reg, r2, r4                        );
     129             : 
     130             :   /*
     131             :    * At this point:
     132             :    * r1: xdp_md
     133             :    * r2: start of next header (gre_hdr)
     134             :    * r3: xdp_md->data_end
     135             :    * r4: clobber
     136             :    * r5: clobber
     137             :   */
     138             : 
     139             :   /* Bound check GRE and inner ip4_hdr access */
     140           0 :   *(code++) = FD_EBPF( mov64_reg, r5, r2                        );
     141           0 :   *(code++) = FD_EBPF( add64_imm, r5, 24                        );  // r5 = 1 byte past inner ip4_hdr. sizeof(gre_hdr) + sizeof(ip4_hdr) = 4 + 20
     142           0 :   *(code++) = FD_EBPF( jgt_reg, r5, r3, LBL_PASS                );  // if (end of inner ip4_hdr + 1) > r3, goto LBL_PASS
     143             : 
     144             : 
     145             :   /* Verify GRE fields */
     146           0 :   *(code++) = FD_EBPF( ldxh, r5, r2, 0                          );  // r5 = gre_hdr->flags/version
     147           0 :   *(code++) = FD_EBPF( jne_imm, r5, 0x0000, LBL_PASS            );  // if gre_hdr->flags/version != 0, goto LBL_PASS
     148           0 :   *(code++) = FD_EBPF( ldxh, r5, r2, 2                          );  // r5 = gre_hdr->protocol
     149           0 :   *(code++) = FD_EBPF( jne_imm, r5, 0x0008, LBL_PASS            );  // if gre_hdr->protocl != IP, goto LBL_PASS
     150             : 
     151             : 
     152             :   /* Advance r2 to start of inner ip4_hdr */
     153           0 :   *(code++) = FD_EBPF( add64_imm, r2, 4                         );  // r2 = start of inner ip4_hdr
     154             : 
     155             :   /* Check inner ip4's encapsulated protocol */
     156           0 :   *(code++) = FD_EBPF( ldxb, r5, r2, 9                          );  // r5 = inner ip4_hdr->protocol
     157           0 :   *(code++) = FD_EBPF( jne_imm, r5, 17, LBL_PASS                );  // if r5!=UDP, goto LBL_PASS
     158             : 
     159             :   /* Calculate the start of udp_hdr and store in r4 */
     160           0 :   *(code++) = FD_EBPF( ldxb, r4, r2, 0                          );  // r4 = inner ip4_hdr->verihl
     161           0 :   *(code++) = FD_EBPF( and64_imm, r4, 0x0f                      );  // r4 = inner ip4_hdr->ihl
     162           0 :   *(code++) = FD_EBPF( lsh64_imm, r4, 2                         );  // r4 = ip4_hdr->ihl*4 (length of ipv4 header)
     163           0 :   *(code++) = FD_EBPF( add64_reg, r4, r2                        );  // r4 = start of udp_hdr
     164             : 
     165             :   /*
     166             :    * At this point:
     167             :    * r1: &xdp_md
     168             :    * r2: start of ip4_hdr  (inner ip4_hdr for gre)
     169             :    * r3: xdp_md->data_end
     170             :    * r4: start of udp_hdr
     171             :    * r5: clobber
     172             :   */
     173             : 
     174             :   /* udp check */
     175           0 :   ulong * udp_check = code;
     176             : 
     177             :   /* check ip4's dst port */
     178           0 :   if( listen_ip4_addr!=0 ) {
     179           0 :     *(code++) = FD_EBPF( ldxw, r5, r2, 16                       );
     180           0 :     *(code++) = FD_EBPF( jne_imm, r5, listen_ip4_addr, LBL_PASS );  // if ip4->daddr != listen_ip4_addr goto LBL_PASS
     181           0 :   }
     182             : 
     183             :   /* Advance r2 to start of udp_hdr */
     184           0 :   *(code++) = FD_EBPF( mov64_reg, r2, r4                        );
     185             : 
     186             :   /* bound check udp hdr access */
     187           0 :   *(code++) = FD_EBPF( add64_imm, r4, 8                         );  // r4 += sizeof(udp_hdr) = 1 byte pass the end of udp_hdr
     188           0 :   *(code++) = FD_EBPF( jgt_reg, r4, r3, LBL_PASS                );  // if (end of udp_hdr + 1) > r3 goto LBL_PASS
     189             : 
     190             :   /* get destination port from udp_hdr */
     191           0 :   *(code++) = FD_EBPF( ldxh, r4, r2, 2                          );  // r4 = udp_hdr->dst_port
     192             : 
     193             :   /* loop through the ports array and find a match with dst_port */
     194           0 :   for( ulong i=0UL; i<ports_cnt; i++ ) {
     195           0 :     ushort port = (ushort)fd_ushort_bswap( ports[ i ]           );
     196           0 :     if( !port ) continue;
     197           0 :     *(code++) = FD_EBPF( jeq_imm, r4, port, LBL_REDIRECT         );  // if dst_port == ports[i] goto LBL_REDIRECT
     198           0 :   }
     199             : 
     200           0 :   ulong * lbl_pass = code;
     201           0 :   *(code++) = FD_EBPF( mov64_imm, r0, XDP_PASS                   );
     202           0 :   *(code++) = FD_EBPF_exit;                                           // return XDP_PASS
     203           0 :   ulong * lbl_redirect = code;
     204           0 :   *(code++) = FD_EBPF( ldxw, r2, r1, 16                          );  // r2 = xdp_md->rx_queue_index
     205           0 :   *(code++) = FD_EBPF( lddw, r1, xsks_fd                         );  // r1 = xsk_map_fd ll
     206           0 :   *(code++) = 0;
     207           0 :   *(code++) = FD_EBPF( mov64_imm, r3, 0                          );  // r3 = 0
     208           0 :   *(code++) = FD_EBPF( call, 0x33                                );
     209           0 :   *(code++) = FD_EBPF_exit;                                           // return bpf_redirect_map(r1,r2,r3)
     210             : 
     211           0 :   ulong * code_end = code;
     212           0 :   ulong   code_cnt = (ulong)( code_end-code_buf );
     213             : 
     214           0 :   FD_LOG_HEXDUMP_DEBUG(( "XDP program", code_buf, code_cnt*sizeof(ulong) ));
     215             : 
     216             :   /* Fill in jump labels */
     217             : 
     218           0 :   for( ulong i=0UL; i<code_cnt; i++ ) {
     219           0 :     if( (code_buf[ i ] & 0x05)==0x05 ) {
     220           0 :       ulong * jmp_target = 0;
     221           0 :       uint    jmp_label = (code_buf[ i ]>>16) & 0xFFFF;
     222           0 :       switch( jmp_label ) {
     223           0 :       case 0: continue;
     224           0 :       case LBL_PASS:      jmp_target = lbl_pass;     break;
     225           0 :       case LBL_REDIRECT:  jmp_target = lbl_redirect; break;
     226           0 :       case LBL_GRE_CHECK: jmp_target = gre_check;    break;
     227           0 :       case LBL_UDP_CHECK: jmp_target = udp_check;    break;
     228           0 :       default: FD_LOG_ERR(( "Invalid jump instruction (%016lx)", fd_ulong_bswap( code_buf[ i ] ) ));
     229           0 :       }
     230           0 :       long   off   = jmp_target-code_buf-(long)i-1;
     231           0 :       ushort off_u = (ushort)(short)off;
     232           0 :       code_buf[ i ] = (code_buf[ i ] & 0xFFFFFFFF0000FFFF) | ((ulong)off_u<<16UL);
     233           0 :     }
     234           0 :   }
     235             : 
     236           0 :   #undef LBL_PASS
     237           0 :   #undef LBL_REDIRECT
     238             : 
     239           0 :   #undef LBL_GRE_CHECK
     240           0 :   #undef LBL_UDP_CHECK
     241           0 :   return code_cnt;
     242           0 : }
     243             : 
     244             : fd_xdp_fds_t
     245             : fd_xdp_install( uint           if_idx,
     246             :                 uint           listen_ip4_addr,
     247             :                 ulong          ports_cnt,
     248             :                 ushort const * ports,
     249           0 :                 char const *   xdp_mode ) {
     250             :   /* Check args */
     251             : 
     252           0 :   uint uxdp_mode = 0;
     253           0 :   if(      !strcmp( xdp_mode, "skb"     ) ) uxdp_mode = XDP_FLAGS_SKB_MODE;
     254           0 :   else if( !strcmp( xdp_mode, "drv"     ) ) uxdp_mode = XDP_FLAGS_DRV_MODE;
     255           0 :   else if( !strcmp( xdp_mode, "hw"      ) ) uxdp_mode = XDP_FLAGS_HW_MODE;
     256           0 :   else if( !strcmp( xdp_mode, "generic" ) ) uxdp_mode = 0U;
     257           0 :   else FD_LOG_ERR(( "unknown XDP mode `%s`", xdp_mode ));
     258             : 
     259           0 :   uint true_port_cnt = 0U;
     260           0 :   for( ulong i=0UL; i<ports_cnt; i++ ) true_port_cnt += !!ports[ i ];
     261           0 :   if( FD_UNLIKELY( !true_port_cnt ) ) FD_LOG_ERR(( "XDP program is not listening on any UDP ports" ));
     262             : 
     263             :   /* Create XSK map */
     264             : 
     265           0 :   union bpf_attr attr2 = {
     266           0 :     .map_type    = BPF_MAP_TYPE_XSKMAP,
     267           0 :     .key_size    = 4U,
     268           0 :     .value_size  = 4U,
     269           0 :     .max_entries = 256U,
     270           0 :     .map_name    = "fd_xdp_xsks"
     271           0 :   };
     272           0 :   int xsk_map_fd = (int)bpf( BPF_MAP_CREATE, &attr2, sizeof(union bpf_attr) );
     273           0 :   if( FD_UNLIKELY( -1==xsk_map_fd ) ) FD_LOG_ERR(( "Failed to create XSKMAP (%i-%s)", errno, fd_io_strerror( errno ) ));
     274             : 
     275             :   /* Load eBPF program into kernel */
     276             : 
     277           0 :   ulong code_buf[ 512 ];
     278           0 :   ulong code_cnt = fd_xdp_gen_program( code_buf, xsk_map_fd, listen_ip4_addr, ports, ports_cnt, 1 );
     279             : 
     280           0 :   char ebpf_kern_log[ 32768UL ];
     281           0 :   union bpf_attr attr = {
     282           0 :     .prog_type = BPF_PROG_TYPE_XDP,
     283           0 :     .insn_cnt  = (uint)code_cnt,
     284           0 :     .insns     = (ulong)code_buf,
     285           0 :     .license   = (ulong)FD_LICENSE,
     286             :     /* Verifier logs */
     287           0 :     .log_level = 6,
     288           0 :     .log_size  = 32768UL,
     289           0 :     .log_buf   = (ulong)ebpf_kern_log
     290           0 :   };
     291           0 :   int prog_fd = (int)bpf( BPF_PROG_LOAD, &attr, sizeof(union bpf_attr) );
     292           0 :   if( FD_UNLIKELY( -1==prog_fd ) ) {
     293           0 :     FD_LOG_WARNING(( "bpf(BPF_PROG_LOAD) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     294           0 :     FD_LOG_ERR(( "eBPF verifier log:\n%s", ebpf_kern_log ));
     295           0 :   }
     296             : 
     297             :   /* Install program to device */
     298             : 
     299           0 :   struct bpf_link_create link_create = {
     300           0 :     .prog_fd        = (uint)prog_fd,
     301           0 :     .target_ifindex = if_idx,
     302           0 :     .attach_type    = BPF_XDP,
     303           0 :     .flags          = uxdp_mode
     304           0 :   };
     305             : 
     306           0 :   int prog_link_fd = (int)bpf( BPF_LINK_CREATE, fd_type_pun( &link_create ), sizeof(struct bpf_link_create) );
     307           0 :   if( FD_UNLIKELY( -1==prog_link_fd ) ) {
     308           0 :     if( FD_LIKELY( errno==ENOSYS ) ) {
     309           0 :       FD_LOG_ERR(( "BPF_LINK_CREATE is not supported by your kernel (%i-%s). Firedancer requires a Linux "
     310           0 :                    "kernel version of v5.7 or newer to support fast XDP networking.  Please upgrade to a newer "
     311           0 :                    "kernel version.", errno, fd_io_strerror( errno ) ));
     312           0 :     } else if( FD_LIKELY( errno==EINVAL ) ) {
     313           0 :       char if_name[ IF_NAMESIZE ] = {0};
     314           0 :       FD_LOG_ERR(( "BPF_LINK_CREATE failed on interface %s (%i-%s).  This likely means the network device "
     315           0 :                    "does not have support for XDP.  If the device is a bonding device, you will need "
     316           0 :                    "a kernel version of v5.15 or newer.  For other devices, see the list of kernel "
     317           0 :                    "support at https://github.com/iovisor/bcc/blob/master/docs/kernel-versions.md#xdp",
     318           0 :                    if_indextoname( if_idx, if_name ), errno, fd_io_strerror( errno ) ));
     319           0 :     } else {
     320           0 :       FD_LOG_ERR(( "BPF_LINK_CREATE failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     321           0 :     }
     322           0 :   }
     323             : 
     324           0 :   if( FD_UNLIKELY( -1==close( prog_fd ) ) ) FD_LOG_ERR(( "close(%d) failed (%i-%s)", xsk_map_fd, errno, fd_io_strerror( errno ) ));
     325             : 
     326           0 :   return (fd_xdp_fds_t){
     327           0 :     .xsk_map_fd   = xsk_map_fd,
     328           0 :     .prog_link_fd = prog_link_fd,
     329           0 :   };
     330           0 : }

Generated by: LCOV version 1.14