Line data Source code
1 : #include "configure.h"
2 :
3 : #include <errno.h>
4 : #include <stdio.h>
5 : #include <unistd.h>
6 : #include <sys/ioctl.h>
7 : #include <sys/stat.h>
8 : #include <linux/if.h>
9 : #include <linux/ethtool.h>
10 : #include <linux/sockios.h>
11 :
12 0 : #define NAME "ethtool-channels"
13 :
14 : static int
15 0 : enabled( config_t const * config ) {
16 :
17 : /* if we're running in a network namespace, we configure ethtool on
18 : the virtual device as part of netns setup, not here */
19 0 : if( config->development.netns.enabled ) return 0;
20 :
21 : /* only enable if network stack is XDP */
22 0 : if( 0!=strcmp( config->development.net.provider, "xdp" ) ) return 0;
23 :
24 0 : return 1;
25 0 : }
26 :
27 : static void
28 : init_perm( fd_cap_chk_t * chk,
29 0 : config_t const * config FD_PARAM_UNUSED ) {
30 0 : fd_cap_chk_root( chk, NAME, "increase network device channels with `ethtool --set-channels`" );
31 0 : }
32 :
33 : static int
34 0 : device_is_bonded( const char * device ) {
35 0 : char path[ PATH_MAX ];
36 0 : FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding", device ) );
37 0 : struct stat st;
38 0 : int err = stat( path, &st );
39 0 : if( FD_UNLIKELY( err && errno != ENOENT ) )
40 0 : FD_LOG_ERR(( "error checking if device `%s` is bonded, stat(%s) failed (%i-%s)",
41 0 : device, path, errno, fd_io_strerror( errno ) ));
42 0 : return !err;
43 0 : }
44 :
45 : static void
46 : device_read_slaves( const char * device,
47 0 : char output[ 4096 ] ) {
48 0 : char path[ PATH_MAX ];
49 0 : FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding/slaves", device ) );
50 :
51 0 : FILE * fp = fopen( path, "r" );
52 0 : if( FD_UNLIKELY( !fp ) )
53 0 : FD_LOG_ERR(( "error configuring network device, fopen(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
54 0 : if( FD_UNLIKELY( !fgets( output, 4096, fp ) ) )
55 0 : FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
56 0 : if( FD_UNLIKELY( feof( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (EOF)", path ));
57 0 : if( FD_UNLIKELY( ferror( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (error)", path ));
58 0 : if( FD_UNLIKELY( strlen( output ) == 4095 ) ) FD_LOG_ERR(( "line too long in `%s`", path ));
59 0 : if( FD_UNLIKELY( strlen( output ) == 0 ) ) FD_LOG_ERR(( "line empty in `%s`", path ));
60 0 : if( FD_UNLIKELY( fclose( fp ) ) )
61 0 : FD_LOG_ERR(( "error configuring network device, fclose(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
62 0 : output[ strlen( output ) - 1 ] = '\0';
63 0 : }
64 :
65 : static void
66 : init_device( const char * device,
67 0 : uint combined_channel_count ) {
68 0 : if( FD_UNLIKELY( strlen( device ) >= IF_NAMESIZE ) ) FD_LOG_ERR(( "device name `%s` is too long", device ));
69 0 : if( FD_UNLIKELY( strlen( device ) == 0 ) ) FD_LOG_ERR(( "device name `%s` is empty", device ));
70 :
71 0 : int sock = socket( AF_INET, SOCK_DGRAM, 0 );
72 0 : if( FD_UNLIKELY( sock < 0 ) )
73 0 : FD_LOG_ERR(( "error configuring network device, socket(AF_INET,SOCK_DGRAM,0) failed (%i-%s)",
74 0 : errno, fd_io_strerror( errno ) ));
75 :
76 0 : struct ethtool_channels channels = {0};
77 0 : channels.cmd = ETHTOOL_GCHANNELS;
78 :
79 0 : struct ifreq ifr = {0};
80 0 : strncpy( ifr.ifr_name, device, IF_NAMESIZE-1 );
81 0 : ifr.ifr_data = (void *)&channels;
82 :
83 0 : if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) )
84 0 : FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_GCHANNELS) failed (%i-%s)",
85 0 : errno, fd_io_strerror( errno ) ));
86 :
87 0 : channels.cmd = ETHTOOL_SCHANNELS;
88 0 : if( channels.max_combined ) {
89 0 : channels.combined_count = combined_channel_count;
90 0 : channels.rx_count = 0;
91 0 : channels.tx_count = 0;
92 0 : FD_LOG_NOTICE(( "RUN: `ethtool --set-channels %s combined %u`", device, combined_channel_count ));
93 0 : } else {
94 0 : channels.combined_count = 0;
95 0 : channels.rx_count = combined_channel_count;
96 0 : channels.tx_count = combined_channel_count;
97 0 : FD_LOG_NOTICE(( "RUN: `ethtool --set-channels %s rx %u tx %u`", device, combined_channel_count, combined_channel_count ));
98 0 : }
99 :
100 0 : if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) ) {
101 0 : if( FD_LIKELY( errno == EBUSY ) )
102 0 : FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_SCHANNELS) failed (%i-%s). "
103 0 : "This is most commonly caused by an issue with the Intel ice driver on certain versions "
104 0 : "of Ubuntu. If you are using the ice driver, `sudo dmesg | grep %s` contains "
105 0 : "messages about RDMA, and you do not need RDMA, try running `rmmod irdma` and/or "
106 0 : "blacklisting the irdma kernel module.",
107 0 : errno, fd_io_strerror( errno ), device ));
108 0 : else
109 0 : FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_SCHANNELS) failed (%i-%s)",
110 0 : errno, fd_io_strerror( errno ) ));
111 0 : }
112 :
113 :
114 0 : if( FD_UNLIKELY( close( sock ) ) )
115 0 : FD_LOG_ERR(( "error configuring network device, close() socket failed (%i-%s)", errno, fd_io_strerror( errno ) ));
116 0 : }
117 :
118 : static void
119 0 : init( config_t const * config ) {
120 : /* we need one channel for both TX and RX on the NIC for each QUIC
121 : tile, but the interface probably defaults to one channel total */
122 0 : if( FD_UNLIKELY( device_is_bonded( config->tiles.net.interface ) ) ) {
123 : /* if using a bonded device, we need to set channels on the
124 : underlying devices. */
125 0 : char line[ 4096 ];
126 0 : device_read_slaves( config->tiles.net.interface, line );
127 0 : char * saveptr;
128 0 : for( char * token=strtok_r( line , " \t", &saveptr ); token!=NULL; token=strtok_r( NULL, " \t", &saveptr ) ) {
129 0 : init_device( token, config->layout.net_tile_count );
130 0 : }
131 0 : } else {
132 0 : init_device( config->tiles.net.interface, config->layout.net_tile_count );
133 0 : }
134 0 : }
135 :
136 : static configure_result_t
137 : check_device( const char * device,
138 0 : uint expected_channel_count ) {
139 0 : if( FD_UNLIKELY( strlen( device ) >= IF_NAMESIZE ) ) FD_LOG_ERR(( "device name `%s` is too long", device ));
140 0 : if( FD_UNLIKELY( strlen( device ) == 0 ) ) FD_LOG_ERR(( "device name `%s` is empty", device ));
141 :
142 0 : int sock = socket( AF_INET, SOCK_DGRAM, 0 );
143 0 : if( FD_UNLIKELY( sock < 0 ) )
144 0 : FD_LOG_ERR(( "error configuring network device, socket(AF_INET,SOCK_DGRAM,0) failed (%i-%s)",
145 0 : errno, fd_io_strerror( errno ) ));
146 :
147 0 : struct ethtool_channels channels = {0};
148 0 : channels.cmd = ETHTOOL_GCHANNELS;
149 :
150 0 : struct ifreq ifr = {0};
151 0 : strncpy( ifr.ifr_name, device, IF_NAMESIZE );
152 0 : ifr.ifr_name[ IF_NAMESIZE - 1 ] = '\0'; // silence linter, not needed for correctness
153 0 : ifr.ifr_data = (void *)&channels;
154 :
155 0 : int supports_channels = 1;
156 0 : uint current_channels = 0;
157 0 : if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) ) {
158 0 : if( FD_LIKELY( errno == EOPNOTSUPP ) ) {
159 : /* network device doesn't support setting number of channels, so
160 : it must always be 1 */
161 0 : supports_channels = 0;
162 0 : current_channels = 1;
163 0 : } else {
164 0 : FD_LOG_ERR(( "error configuring network device `%s`, ioctl(SIOCETHTOOL,ETHTOOL_GCHANNELS) failed (%i-%s)",
165 0 : device, errno, fd_io_strerror( errno ) ));
166 0 : }
167 0 : }
168 :
169 0 : if( FD_UNLIKELY( close( sock ) ) )
170 0 : FD_LOG_ERR(( "error configuring network device, close() socket failed (%i-%s)", errno, fd_io_strerror( errno ) ));
171 :
172 0 : if( channels.combined_count ) {
173 0 : current_channels = channels.combined_count;
174 0 : } else if( channels.rx_count || channels.tx_count ) {
175 0 : if( FD_UNLIKELY( channels.rx_count != channels.tx_count ) ) {
176 0 : NOT_CONFIGURED( "device `%s` has unbalanced channel count: (got %u rx, %u tx, expected %u)",
177 0 : device, channels.rx_count, channels.tx_count, expected_channel_count );
178 0 : }
179 0 : current_channels = channels.rx_count;
180 0 : }
181 :
182 0 : if( FD_UNLIKELY( current_channels != expected_channel_count ) ) {
183 0 : if( FD_UNLIKELY( !supports_channels ) ) {
184 0 : FD_LOG_ERR(( "Network device `%s` does not support setting number of channels, "
185 0 : "but you are running with more than one net tile (expected {%u}), "
186 0 : "and there must be one channel per tile. You can either use a NIC "
187 0 : "that supports multiple channels, or run Firedancer with only one "
188 0 : "net tile. You can configure Firedancer to run with only one QUIC "
189 0 : "tile by setting `layout.net_tile_count` to 1 in your "
190 0 : "configuration file. It is not recommended to do this in production "
191 0 : "as it will limit network performance.",
192 0 : device, expected_channel_count ));
193 0 : } else {
194 0 : NOT_CONFIGURED( "device `%s` does not have right number of channels (got %u but "
195 0 : "expected %u)",
196 0 : device, current_channels, expected_channel_count );
197 0 : }
198 0 : }
199 :
200 0 : CONFIGURE_OK();
201 0 : }
202 :
203 : static configure_result_t
204 0 : check( config_t const * config ) {
205 0 : if( FD_UNLIKELY( device_is_bonded( config->tiles.net.interface ) ) ) {
206 0 : char line[ 4096 ];
207 0 : device_read_slaves( config->tiles.net.interface, line );
208 0 : char * saveptr;
209 0 : for( char * token=strtok_r( line, " \t", &saveptr ); token!=NULL; token=strtok_r( NULL, " \t", &saveptr ) ) {
210 0 : CHECK( check_device( token, config->layout.net_tile_count ) );
211 0 : }
212 0 : } else {
213 0 : CHECK( check_device( config->tiles.net.interface, config->layout.net_tile_count ) );
214 0 : }
215 :
216 0 : CONFIGURE_OK();
217 0 : }
218 :
219 : configure_stage_t fd_cfg_stage_ethtool_channels = {
220 : .name = NAME,
221 : .always_recreate = 0,
222 : .enabled = enabled,
223 : .init_perm = init_perm,
224 : .fini_perm = NULL,
225 : .init = init,
226 : .fini = NULL,
227 : .check = check,
228 : };
229 :
230 : #undef NAME
|