LCOV - code coverage report
Current view: top level - app/shared/commands/configure - ethtool-channels.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 156 0.0 %
Date: 2025-03-20 12:08:36 Functions: 0 8 0.0 %

          Line data    Source code
       1             : #include "configure.h"
       2             : 
       3             : #include <errno.h>
       4             : #include <stdio.h>
       5             : #include <unistd.h>
       6             : #include <sys/ioctl.h>
       7             : #include <sys/stat.h>
       8             : #include <linux/if.h>
       9             : #include <linux/ethtool.h>
      10             : #include <linux/sockios.h>
      11             : 
      12           0 : #define NAME "ethtool-channels"
      13             : 
      14             : static int
      15           0 : enabled( config_t const * config ) {
      16             : 
      17             :   /* if we're running in a network namespace, we configure ethtool on
      18             :      the virtual device as part of netns setup, not here */
      19           0 :   if( config->development.netns.enabled ) return 0;
      20             : 
      21             :   /* only enable if network stack is XDP */
      22           0 :   if( 0!=strcmp( config->development.net.provider, "xdp" ) ) return 0;
      23             : 
      24           0 :   return 1;
      25           0 : }
      26             : 
      27             : static void
      28             : init_perm( fd_cap_chk_t *   chk,
      29           0 :            config_t const * config FD_PARAM_UNUSED ) {
      30           0 :   fd_cap_chk_root( chk, NAME, "increase network device channels with `ethtool --set-channels`" );
      31           0 : }
      32             : 
      33             : static int
      34           0 : device_is_bonded( const char * device ) {
      35           0 :   char path[ PATH_MAX ];
      36           0 :   FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding", device ) );
      37           0 :   struct stat st;
      38           0 :   int err = stat( path, &st );
      39           0 :   if( FD_UNLIKELY( err && errno != ENOENT ) )
      40           0 :     FD_LOG_ERR(( "error checking if device `%s` is bonded, stat(%s) failed (%i-%s)",
      41           0 :                  device, path, errno, fd_io_strerror( errno ) ));
      42           0 :   return !err;
      43           0 : }
      44             : 
      45             : static void
      46             : device_read_slaves( const char * device,
      47           0 :                     char         output[ 4096 ] ) {
      48           0 :   char path[ PATH_MAX ];
      49           0 :   FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding/slaves", device ) );
      50             : 
      51           0 :   FILE * fp = fopen( path, "r" );
      52           0 :   if( FD_UNLIKELY( !fp ) )
      53           0 :     FD_LOG_ERR(( "error configuring network device, fopen(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
      54           0 :   if( FD_UNLIKELY( !fgets( output, 4096, fp ) ) )
      55           0 :     FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
      56           0 :   if( FD_UNLIKELY( feof( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (EOF)", path ));
      57           0 :   if( FD_UNLIKELY( ferror( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (error)", path ));
      58           0 :   if( FD_UNLIKELY( strlen( output ) == 4095 ) ) FD_LOG_ERR(( "line too long in `%s`", path ));
      59           0 :   if( FD_UNLIKELY( strlen( output ) == 0 ) ) FD_LOG_ERR(( "line empty in `%s`", path ));
      60           0 :   if( FD_UNLIKELY( fclose( fp ) ) )
      61           0 :     FD_LOG_ERR(( "error configuring network device, fclose(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
      62           0 :   output[ strlen( output ) - 1 ] = '\0';
      63           0 : }
      64             : 
      65             : static void
      66             : init_device( const char * device,
      67           0 :              uint         combined_channel_count ) {
      68           0 :   if( FD_UNLIKELY( strlen( device ) >= IF_NAMESIZE ) ) FD_LOG_ERR(( "device name `%s` is too long", device ));
      69           0 :   if( FD_UNLIKELY( strlen( device ) == 0 ) ) FD_LOG_ERR(( "device name `%s` is empty", device ));
      70             : 
      71           0 :   int sock = socket( AF_INET, SOCK_DGRAM, 0 );
      72           0 :   if( FD_UNLIKELY( sock < 0 ) )
      73           0 :     FD_LOG_ERR(( "error configuring network device, socket(AF_INET,SOCK_DGRAM,0) failed (%i-%s)",
      74           0 :                  errno, fd_io_strerror( errno ) ));
      75             : 
      76           0 :   struct ethtool_channels channels = {0};
      77           0 :   channels.cmd = ETHTOOL_GCHANNELS;
      78             : 
      79           0 :   struct ifreq ifr = {0};
      80           0 :   strncpy( ifr.ifr_name, device, IF_NAMESIZE-1 );
      81           0 :   ifr.ifr_data = (void *)&channels;
      82             : 
      83           0 :   if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) )
      84           0 :     FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_GCHANNELS) failed (%i-%s)",
      85           0 :                  errno, fd_io_strerror( errno ) ));
      86             : 
      87           0 :   channels.cmd = ETHTOOL_SCHANNELS;
      88           0 :   if( channels.max_combined ) {
      89           0 :     channels.combined_count = combined_channel_count;
      90           0 :     channels.rx_count       = 0;
      91           0 :     channels.tx_count       = 0;
      92           0 :     FD_LOG_NOTICE(( "RUN: `ethtool --set-channels %s combined %u`", device, combined_channel_count ));
      93           0 :   } else {
      94           0 :     channels.combined_count = 0;
      95           0 :     channels.rx_count       = combined_channel_count;
      96           0 :     channels.tx_count       = combined_channel_count;
      97           0 :     FD_LOG_NOTICE(( "RUN: `ethtool --set-channels %s rx %u tx %u`", device, combined_channel_count, combined_channel_count ));
      98           0 :   }
      99             : 
     100           0 :   if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) ) {
     101           0 :     if( FD_LIKELY( errno == EBUSY ) )
     102           0 :       FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_SCHANNELS) failed (%i-%s). "
     103           0 :                    "This is most commonly caused by an issue with the Intel ice driver on certain versions "
     104           0 :                    "of Ubuntu.  If you are using the ice driver, `sudo dmesg | grep %s` contains "
     105           0 :                    "messages about RDMA, and you do not need RDMA, try running `rmmod irdma` and/or "
     106           0 :                    "blacklisting the irdma kernel module.",
     107           0 :                    errno, fd_io_strerror( errno ), device ));
     108           0 :     else
     109           0 :       FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_SCHANNELS) failed (%i-%s)",
     110           0 :                    errno, fd_io_strerror( errno ) ));
     111           0 :   }
     112             : 
     113             : 
     114           0 :   if( FD_UNLIKELY( close( sock ) ) )
     115           0 :     FD_LOG_ERR(( "error configuring network device, close() socket failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     116           0 : }
     117             : 
     118             : static void
     119           0 : init( config_t const * config ) {
     120             :   /* we need one channel for both TX and RX on the NIC for each QUIC
     121             :      tile, but the interface probably defaults to one channel total */
     122           0 :   if( FD_UNLIKELY( device_is_bonded( config->tiles.net.interface ) ) ) {
     123             :     /* if using a bonded device, we need to set channels on the
     124             :        underlying devices. */
     125           0 :     char line[ 4096 ];
     126           0 :     device_read_slaves( config->tiles.net.interface, line );
     127           0 :     char * saveptr;
     128           0 :     for( char * token=strtok_r( line , " \t", &saveptr ); token!=NULL; token=strtok_r( NULL, " \t", &saveptr ) ) {
     129           0 :       init_device( token, config->layout.net_tile_count );
     130           0 :     }
     131           0 :   } else {
     132           0 :     init_device( config->tiles.net.interface, config->layout.net_tile_count );
     133           0 :   }
     134           0 : }
     135             : 
     136             : static configure_result_t
     137             : check_device( const char * device,
     138           0 :               uint         expected_channel_count ) {
     139           0 :   if( FD_UNLIKELY( strlen( device ) >= IF_NAMESIZE ) ) FD_LOG_ERR(( "device name `%s` is too long", device ));
     140           0 :   if( FD_UNLIKELY( strlen( device ) == 0 ) ) FD_LOG_ERR(( "device name `%s` is empty", device ));
     141             : 
     142           0 :   int sock = socket( AF_INET, SOCK_DGRAM, 0 );
     143           0 :   if( FD_UNLIKELY( sock < 0 ) )
     144           0 :     FD_LOG_ERR(( "error configuring network device, socket(AF_INET,SOCK_DGRAM,0) failed (%i-%s)",
     145           0 :                  errno, fd_io_strerror( errno ) ));
     146             : 
     147           0 :   struct ethtool_channels channels = {0};
     148           0 :   channels.cmd = ETHTOOL_GCHANNELS;
     149             : 
     150           0 :   struct ifreq ifr = {0};
     151           0 :   strncpy( ifr.ifr_name, device, IF_NAMESIZE );
     152           0 :   ifr.ifr_name[ IF_NAMESIZE - 1 ] = '\0'; // silence linter, not needed for correctness
     153           0 :   ifr.ifr_data = (void *)&channels;
     154             : 
     155           0 :   int  supports_channels = 1;
     156           0 :   uint current_channels  = 0;
     157           0 :   if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) ) {
     158           0 :     if( FD_LIKELY( errno == EOPNOTSUPP ) ) {
     159             :       /* network device doesn't support setting number of channels, so
     160             :          it must always be 1 */
     161           0 :       supports_channels = 0;
     162           0 :       current_channels  = 1;
     163           0 :     } else {
     164           0 :       FD_LOG_ERR(( "error configuring network device `%s`, ioctl(SIOCETHTOOL,ETHTOOL_GCHANNELS) failed (%i-%s)",
     165           0 :                    device, errno, fd_io_strerror( errno ) ));
     166           0 :     }
     167           0 :   }
     168             : 
     169           0 :   if( FD_UNLIKELY( close( sock ) ) )
     170           0 :     FD_LOG_ERR(( "error configuring network device, close() socket failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     171             : 
     172           0 :   if( channels.combined_count ) {
     173           0 :     current_channels = channels.combined_count;
     174           0 :   } else if( channels.rx_count || channels.tx_count ) {
     175           0 :     if( FD_UNLIKELY( channels.rx_count != channels.tx_count ) ) {
     176           0 :       NOT_CONFIGURED( "device `%s` has unbalanced channel count: (got %u rx, %u tx, expected %u)",
     177           0 :                       device, channels.rx_count, channels.tx_count, expected_channel_count );
     178           0 :     }
     179           0 :     current_channels = channels.rx_count;
     180           0 :   }
     181             : 
     182           0 :   if( FD_UNLIKELY( current_channels != expected_channel_count ) ) {
     183           0 :     if( FD_UNLIKELY( !supports_channels ) ) {
     184           0 :       FD_LOG_ERR(( "Network device `%s` does not support setting number of channels, "
     185           0 :                    "but you are running with more than one net tile (expected {%u}), "
     186           0 :                    "and there must be one channel per tile. You can either use a NIC "
     187           0 :                    "that supports multiple channels, or run Firedancer with only one "
     188           0 :                    "net tile. You can configure Firedancer to run with only one QUIC "
     189           0 :                    "tile by setting `layout.net_tile_count` to 1 in your "
     190           0 :                    "configuration file. It is not recommended to do this in production "
     191           0 :                    "as it will limit network performance.",
     192           0 :                    device, expected_channel_count ));
     193           0 :     } else {
     194           0 :       NOT_CONFIGURED( "device `%s` does not have right number of channels (got %u but "
     195           0 :                       "expected %u)",
     196           0 :                       device, current_channels, expected_channel_count );
     197           0 :     }
     198           0 :   }
     199             : 
     200           0 :   CONFIGURE_OK();
     201           0 : }
     202             : 
     203             : static configure_result_t
     204           0 : check( config_t const * config ) {
     205           0 :   if( FD_UNLIKELY( device_is_bonded( config->tiles.net.interface ) ) ) {
     206           0 :     char line[ 4096 ];
     207           0 :     device_read_slaves( config->tiles.net.interface, line );
     208           0 :     char * saveptr;
     209           0 :     for( char * token=strtok_r( line, " \t", &saveptr ); token!=NULL; token=strtok_r( NULL, " \t", &saveptr ) ) {
     210           0 :       CHECK( check_device( token, config->layout.net_tile_count ) );
     211           0 :     }
     212           0 :   } else {
     213           0 :     CHECK( check_device( config->tiles.net.interface, config->layout.net_tile_count ) );
     214           0 :   }
     215             : 
     216           0 :   CONFIGURE_OK();
     217           0 : }
     218             : 
     219             : configure_stage_t fd_cfg_stage_ethtool_channels = {
     220             :   .name            = NAME,
     221             :   .always_recreate = 0,
     222             :   .enabled         = enabled,
     223             :   .init_perm       = init_perm,
     224             :   .fini_perm       = NULL,
     225             :   .init            = init,
     226             :   .fini            = NULL,
     227             :   .check           = check,
     228             : };
     229             : 
     230             : #undef NAME

Generated by: LCOV version 1.14