Line data Source code
1 : #include "configure.h"
2 :
3 : #include <stdio.h>
4 : #include <unistd.h>
5 : #include <sys/ioctl.h>
6 : #include <sys/stat.h>
7 : #include <linux/if.h>
8 : #include <linux/ethtool.h>
9 : #include <linux/sockios.h>
10 :
11 0 : #define NAME "ethtool-channels"
12 :
13 : static int
14 0 : enabled( config_t const * config ) {
15 : /* if we're running in a network namespace, we configure ethtool on
16 : the virtual device as part of netns setup, not here */
17 0 : return !config->development.netns.enabled;
18 0 : }
19 :
20 : static void
21 : init_perm( fd_caps_ctx_t * caps,
22 0 : config_t const * config FD_PARAM_UNUSED ) {
23 0 : fd_caps_check_root( caps, NAME, "increase network device channels with `ethtool --set-channels`" );
24 0 : }
25 :
26 : static int
27 0 : device_is_bonded( const char * device ) {
28 0 : char path[ PATH_MAX ];
29 0 : FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding", device ) );
30 0 : struct stat st;
31 0 : int err = stat( path, &st );
32 0 : if( FD_UNLIKELY( err && errno != ENOENT ) )
33 0 : FD_LOG_ERR(( "error checking if device `%s` is bonded, stat(%s) failed (%i-%s)",
34 0 : device, path, errno, fd_io_strerror( errno ) ));
35 0 : return !err;
36 0 : }
37 :
38 : static void
39 : device_read_slaves( const char * device,
40 0 : char output[ 4096 ] ) {
41 0 : char path[ PATH_MAX ];
42 0 : FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding/slaves", device ) );
43 :
44 0 : FILE * fp = fopen( path, "r" );
45 0 : if( FD_UNLIKELY( !fp ) )
46 0 : FD_LOG_ERR(( "error configuring network device, fopen(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
47 0 : if( FD_UNLIKELY( !fgets( output, 4096, fp ) ) )
48 0 : FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
49 0 : if( FD_UNLIKELY( feof( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (EOF)", path ));
50 0 : if( FD_UNLIKELY( ferror( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (error)", path ));
51 0 : if( FD_UNLIKELY( strlen( output ) == 4095 ) ) FD_LOG_ERR(( "line too long in `%s`", path ));
52 0 : if( FD_UNLIKELY( strlen( output ) == 0 ) ) FD_LOG_ERR(( "line empty in `%s`", path ));
53 0 : if( FD_UNLIKELY( fclose( fp ) ) )
54 0 : FD_LOG_ERR(( "error configuring network device, fclose(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
55 0 : output[ strlen( output ) - 1 ] = '\0';
56 0 : }
57 :
58 : static void
59 : init_device( const char * device,
60 0 : uint combined_channel_count ) {
61 0 : if( FD_UNLIKELY( strlen( device ) >= IF_NAMESIZE ) ) FD_LOG_ERR(( "device name `%s` is too long", device ));
62 0 : if( FD_UNLIKELY( strlen( device ) == 0 ) ) FD_LOG_ERR(( "device name `%s` is empty", device ));
63 :
64 0 : int sock = socket( AF_INET, SOCK_DGRAM, 0 );
65 0 : if( FD_UNLIKELY( sock < 0 ) )
66 0 : FD_LOG_ERR(( "error configuring network device, socket(AF_INET,SOCK_DGRAM,0) failed (%i-%s)",
67 0 : errno, fd_io_strerror( errno ) ));
68 :
69 0 : struct ethtool_channels channels = {0};
70 0 : channels.cmd = ETHTOOL_GCHANNELS;
71 :
72 0 : struct ifreq ifr = {0};
73 0 : strncpy( ifr.ifr_name, device, IF_NAMESIZE-1 );
74 0 : ifr.ifr_data = (void *)&channels;
75 :
76 0 : if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) )
77 0 : FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_GCHANNELS) failed (%i-%s)",
78 0 : errno, fd_io_strerror( errno ) ));
79 :
80 0 : channels.cmd = ETHTOOL_SCHANNELS;
81 0 : if( channels.max_combined ) {
82 0 : channels.combined_count = combined_channel_count;
83 0 : channels.rx_count = 0;
84 0 : channels.tx_count = 0;
85 0 : FD_LOG_NOTICE(( "RUN: `ethtool --set-channels %s combined %u`", device, combined_channel_count ));
86 0 : } else {
87 0 : channels.combined_count = 0;
88 0 : channels.rx_count = combined_channel_count;
89 0 : channels.tx_count = combined_channel_count;
90 0 : FD_LOG_NOTICE(( "RUN: `ethtool --set-channels %s rx %u tx %u`", device, combined_channel_count, combined_channel_count ));
91 0 : }
92 :
93 0 : if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) ) {
94 0 : if( FD_LIKELY( errno == EBUSY ) )
95 0 : FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_SCHANNELS) failed (%i-%s). "
96 0 : "This is most commonly caused by an issue with the Intel ice driver on certain versions "
97 0 : "of Ubuntu. If you are using the ice driver, `sudo dmesg | grep %s` contains "
98 0 : "messages about RDMA, and you do not need RDMA, try running `rmmod irdma` and/or "
99 0 : "blacklisting the irdma kernel module.",
100 0 : errno, fd_io_strerror( errno ), device ));
101 0 : else
102 0 : FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_SCHANNELS) failed (%i-%s)",
103 0 : errno, fd_io_strerror( errno ) ));
104 0 : }
105 :
106 :
107 0 : if( FD_UNLIKELY( close( sock ) ) )
108 0 : FD_LOG_ERR(( "error configuring network device, close() socket failed (%i-%s)", errno, fd_io_strerror( errno ) ));
109 0 : }
110 :
111 : static void
112 0 : init( config_t * const config ) {
113 : /* we need one channel for both TX and RX on the NIC for each QUIC
114 : tile, but the interface probably defaults to one channel total */
115 0 : if( FD_UNLIKELY( device_is_bonded( config->tiles.net.interface ) ) ) {
116 : /* if using a bonded device, we need to set channels on the
117 : underlying devices. */
118 0 : char line[ 4096 ];
119 0 : device_read_slaves( config->tiles.net.interface, line );
120 0 : char * saveptr;
121 0 : for( char * token=strtok_r( line , " \t", &saveptr ); token!=NULL; token=strtok_r( NULL, " \t", &saveptr ) ) {
122 0 : init_device( token, config->layout.net_tile_count );
123 0 : }
124 0 : } else {
125 0 : init_device( config->tiles.net.interface, config->layout.net_tile_count );
126 0 : }
127 0 : }
128 :
129 : static configure_result_t
130 : check_device( const char * device,
131 0 : uint expected_channel_count ) {
132 0 : if( FD_UNLIKELY( strlen( device ) >= IF_NAMESIZE ) ) FD_LOG_ERR(( "device name `%s` is too long", device ));
133 0 : if( FD_UNLIKELY( strlen( device ) == 0 ) ) FD_LOG_ERR(( "device name `%s` is empty", device ));
134 :
135 0 : int sock = socket( AF_INET, SOCK_DGRAM, 0 );
136 0 : if( FD_UNLIKELY( sock < 0 ) )
137 0 : FD_LOG_ERR(( "error configuring network device, socket(AF_INET,SOCK_DGRAM,0) failed (%i-%s)",
138 0 : errno, fd_io_strerror( errno ) ));
139 :
140 0 : struct ethtool_channels channels = {0};
141 0 : channels.cmd = ETHTOOL_GCHANNELS;
142 :
143 0 : struct ifreq ifr = {0};
144 0 : strncpy( ifr.ifr_name, device, IF_NAMESIZE );
145 0 : ifr.ifr_name[ IF_NAMESIZE - 1 ] = '\0'; // silence linter, not needed for correctness
146 0 : ifr.ifr_data = (void *)&channels;
147 :
148 0 : int supports_channels = 1;
149 0 : uint current_channels = 0;
150 0 : if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) ) {
151 0 : if( FD_LIKELY( errno == EOPNOTSUPP ) ) {
152 : /* network device doesn't support setting number of channels, so
153 : it must always be 1 */
154 0 : supports_channels = 0;
155 0 : current_channels = 1;
156 0 : } else {
157 0 : FD_LOG_ERR(( "error configuring network device `%s`, ioctl(SIOCETHTOOL,ETHTOOL_GCHANNELS) failed (%i-%s)",
158 0 : device, errno, fd_io_strerror( errno ) ));
159 0 : }
160 0 : }
161 :
162 0 : if( FD_UNLIKELY( close( sock ) ) )
163 0 : FD_LOG_ERR(( "error configuring network device, close() socket failed (%i-%s)", errno, fd_io_strerror( errno ) ));
164 :
165 0 : if( channels.combined_count ) {
166 0 : current_channels = channels.combined_count;
167 0 : } else if( channels.rx_count || channels.tx_count ) {
168 0 : if( FD_UNLIKELY( channels.rx_count != channels.tx_count ) ) {
169 0 : NOT_CONFIGURED( "device `%s` has unbalanced channel count: (got %u rx, %u tx, expected %u)",
170 0 : device, channels.rx_count, channels.tx_count, expected_channel_count );
171 0 : }
172 0 : current_channels = channels.rx_count;
173 0 : }
174 :
175 0 : if( FD_UNLIKELY( current_channels != expected_channel_count ) ) {
176 0 : if( FD_UNLIKELY( !supports_channels ) ) {
177 0 : FD_LOG_ERR(( "Network device `%s` does not support setting number of channels, "
178 0 : "but you are running with more than one net tile (expected {%u}), "
179 0 : "and there must be one channel per tile. You can either use a NIC "
180 0 : "that supports multiple channels, or run Firedancer with only one "
181 0 : "net tile. You can configure Firedancer to run with only one QUIC "
182 0 : "tile by setting `layout.net_tile_count` to 1 in your "
183 0 : "configuration file. It is not recommended to do this in production "
184 0 : "as it will limit network performance.",
185 0 : device, expected_channel_count ));
186 0 : } else {
187 0 : NOT_CONFIGURED( "device `%s` does not have right number of channels (got %u but "
188 0 : "expected %u)",
189 0 : device, current_channels, expected_channel_count );
190 0 : }
191 0 : }
192 :
193 0 : CONFIGURE_OK();
194 0 : }
195 :
196 : static configure_result_t
197 0 : check( config_t const * config ) {
198 0 : if( FD_UNLIKELY( device_is_bonded( config->tiles.net.interface ) ) ) {
199 0 : char line[ 4096 ];
200 0 : device_read_slaves( config->tiles.net.interface, line );
201 0 : char * saveptr;
202 0 : for( char * token=strtok_r( line, " \t", &saveptr ); token!=NULL; token=strtok_r( NULL, " \t", &saveptr ) ) {
203 0 : CHECK( check_device( token, config->layout.net_tile_count ) );
204 0 : }
205 0 : } else {
206 0 : CHECK( check_device( config->tiles.net.interface, config->layout.net_tile_count ) );
207 0 : }
208 :
209 0 : CONFIGURE_OK();
210 0 : }
211 :
212 : configure_stage_t fd_cfg_stage_ethtool_channels = {
213 : .name = NAME,
214 : .always_recreate = 0,
215 : .enabled = enabled,
216 : .init_perm = init_perm,
217 : .fini_perm = NULL,
218 : .init = init,
219 : .fini = NULL,
220 : .check = check,
221 : };
222 :
223 : #undef NAME
|