Line data Source code
1 : #include "configure.h"
2 :
3 : #include <stdio.h>
4 : #include <ctype.h>
5 : #include <unistd.h>
6 : #include <sys/ioctl.h>
7 : #include <sys/stat.h>
8 : #include <linux/if.h>
9 : #include <linux/ethtool.h>
10 : #include <linux/sockios.h>
11 :
12 0 : #define NAME "ethtool-channels"
13 :
14 : static int
15 0 : enabled( config_t * const config ) {
16 : /* if we're running in a network namespace, we configure ethtool on
17 : the virtual device as part of netns setup, not here */
18 0 : return !config->development.netns.enabled;
19 0 : }
20 :
21 : static void
22 : init_perm( fd_caps_ctx_t * caps,
23 0 : config_t * const config ) {
24 0 : (void)config;
25 0 : fd_caps_check_root( caps, NAME, "increase network device channels with `ethtool --set-channels`" );
26 0 : }
27 :
28 : static int
29 0 : device_is_bonded( const char * device ) {
30 0 : char path[ PATH_MAX ];
31 0 : FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding", device ) );
32 0 : struct stat st;
33 0 : int err = stat( path, &st );
34 0 : if( FD_UNLIKELY( err && errno != ENOENT ) )
35 0 : FD_LOG_ERR(( "error checking if device `%s` is bonded, stat(%s) failed (%i-%s)",
36 0 : device, path, errno, fd_io_strerror( errno ) ));
37 0 : return !err;
38 0 : }
39 :
40 : static void
41 : device_read_slaves( const char * device,
42 0 : char output[ 4096 ] ) {
43 0 : char path[ PATH_MAX ];
44 0 : FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding/slaves", device ) );
45 :
46 0 : FILE * fp = fopen( path, "r" );
47 0 : if( FD_UNLIKELY( !fp ) )
48 0 : FD_LOG_ERR(( "error configuring network device, fopen(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
49 0 : if( FD_UNLIKELY( !fgets( output, 4096, fp ) ) )
50 0 : FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
51 0 : if( FD_UNLIKELY( feof( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (EOF)", path ));
52 0 : if( FD_UNLIKELY( ferror( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (error)", path ));
53 0 : if( FD_UNLIKELY( strlen( output ) == 4095 ) ) FD_LOG_ERR(( "line too long in `%s`", path ));
54 0 : if( FD_UNLIKELY( strlen( output ) == 0 ) ) FD_LOG_ERR(( "line empty in `%s`", path ));
55 0 : if( FD_UNLIKELY( fclose( fp ) ) )
56 0 : FD_LOG_ERR(( "error configuring network device, fclose(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
57 0 : output[ strlen( output ) - 1 ] = '\0';
58 0 : }
59 :
60 : static void
61 : init_device( const char * device,
62 0 : uint combined_channel_count ) {
63 0 : if( FD_UNLIKELY( strlen( device ) >= IF_NAMESIZE ) ) FD_LOG_ERR(( "device name `%s` is too long", device ));
64 0 : if( FD_UNLIKELY( strlen( device ) == 0 ) ) FD_LOG_ERR(( "device name `%s` is empty", device ));
65 :
66 0 : int sock = socket( AF_INET, SOCK_DGRAM, 0 );
67 0 : if( FD_UNLIKELY( sock < 0 ) )
68 0 : FD_LOG_ERR(( "error configuring network device, socket(AF_INET,SOCK_DGRAM,0) failed (%i-%s)",
69 0 : errno, fd_io_strerror( errno ) ));
70 :
71 0 : struct ethtool_channels channels = {0};
72 0 : channels.cmd = ETHTOOL_GCHANNELS;
73 :
74 0 : struct ifreq ifr = {0};
75 0 : strncpy( ifr.ifr_name, device, IF_NAMESIZE-1 );
76 0 : ifr.ifr_data = (void *)&channels;
77 :
78 0 : if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) )
79 0 : FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_GCHANNELS) failed (%i-%s)",
80 0 : errno, fd_io_strerror( errno ) ));
81 :
82 0 : channels.cmd = ETHTOOL_SCHANNELS;
83 0 : if( channels.max_combined ) {
84 0 : channels.combined_count = combined_channel_count;
85 0 : channels.rx_count = 0;
86 0 : channels.tx_count = 0;
87 0 : FD_LOG_NOTICE(( "RUN: `ethtool --set-channels %s combined %u`", device, combined_channel_count ));
88 0 : } else {
89 0 : channels.combined_count = 0;
90 0 : channels.rx_count = combined_channel_count;
91 0 : channels.tx_count = combined_channel_count;
92 0 : FD_LOG_NOTICE(( "RUN: `ethtool --set-channels %s rx %u tx %u`", device, combined_channel_count, combined_channel_count ));
93 0 : }
94 :
95 0 : if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) ) {
96 0 : if( FD_LIKELY( errno == EBUSY ) )
97 0 : FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_SCHANNELS) failed (%i-%s). "
98 0 : "This is most commonly caused by an issue with the Intel ice driver on certain versions "
99 0 : "of Ubuntu. If you are using the ice driver, `sudo dmesg | grep %s` contains "
100 0 : "messages about RDMA, and you do not need RDMA, try running `rmmod irdma` and/or "
101 0 : "blacklisting the irdma kernel module.",
102 0 : errno, fd_io_strerror( errno ), device ));
103 0 : else
104 0 : FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_SCHANNELS) failed (%i-%s)",
105 0 : errno, fd_io_strerror( errno ) ));
106 0 : }
107 :
108 :
109 0 : if( FD_UNLIKELY( close( sock ) ) )
110 0 : FD_LOG_ERR(( "error configuring network device, close() socket failed (%i-%s)", errno, fd_io_strerror( errno ) ));
111 0 : }
112 :
113 : static void
114 0 : init( config_t * const config ) {
115 : /* we need one channel for both TX and RX on the NIC for each QUIC
116 : tile, but the interface probably defaults to one channel total */
117 0 : if( FD_UNLIKELY( device_is_bonded( config->tiles.net.interface ) ) ) {
118 : /* if using a bonded device, we need to set channels on the
119 : underlying devices. */
120 0 : char line[ 4096 ];
121 0 : device_read_slaves( config->tiles.net.interface, line );
122 0 : char * saveptr;
123 0 : for( char * token=strtok_r( line , " \t", &saveptr ); token!=NULL; token=strtok_r( NULL, " \t", &saveptr ) ) {
124 0 : init_device( token, config->layout.net_tile_count );
125 0 : }
126 0 : } else {
127 0 : init_device( config->tiles.net.interface, config->layout.net_tile_count );
128 0 : }
129 0 : }
130 :
131 : static configure_result_t
132 : check_device( const char * device,
133 0 : uint expected_channel_count ) {
134 0 : if( FD_UNLIKELY( strlen( device ) >= IF_NAMESIZE ) ) FD_LOG_ERR(( "device name `%s` is too long", device ));
135 0 : if( FD_UNLIKELY( strlen( device ) == 0 ) ) FD_LOG_ERR(( "device name `%s` is empty", device ));
136 :
137 0 : int sock = socket( AF_INET, SOCK_DGRAM, 0 );
138 0 : if( FD_UNLIKELY( sock < 0 ) )
139 0 : FD_LOG_ERR(( "error configuring network device, socket(AF_INET,SOCK_DGRAM,0) failed (%i-%s)",
140 0 : errno, fd_io_strerror( errno ) ));
141 :
142 0 : struct ethtool_channels channels = {0};
143 0 : channels.cmd = ETHTOOL_GCHANNELS;
144 :
145 0 : struct ifreq ifr = {0};
146 0 : strncpy( ifr.ifr_name, device, IF_NAMESIZE );
147 0 : ifr.ifr_name[ IF_NAMESIZE - 1 ] = '\0'; // silence linter, not needed for correctness
148 0 : ifr.ifr_data = (void *)&channels;
149 :
150 0 : int supports_channels = 1;
151 0 : uint current_channels = 0;
152 0 : if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) ) {
153 0 : if( FD_LIKELY( errno == EOPNOTSUPP ) ) {
154 : /* network device doesn't support setting number of channels, so
155 : it must always be 1 */
156 0 : supports_channels = 0;
157 0 : current_channels = 1;
158 0 : } else {
159 0 : FD_LOG_ERR(( "error configuring network device `%s`, ioctl(SIOCETHTOOL,ETHTOOL_GCHANNELS) failed (%i-%s)",
160 0 : device, errno, fd_io_strerror( errno ) ));
161 0 : }
162 0 : }
163 :
164 0 : if( FD_UNLIKELY( close( sock ) ) )
165 0 : FD_LOG_ERR(( "error configuring network device, close() socket failed (%i-%s)", errno, fd_io_strerror( errno ) ));
166 :
167 0 : if( channels.combined_count ) {
168 0 : current_channels = channels.combined_count;
169 0 : } else if( channels.rx_count || channels.tx_count ) {
170 0 : if( FD_UNLIKELY( channels.rx_count != channels.tx_count ) ) {
171 0 : NOT_CONFIGURED( "device `%s` has unbalanced channel count: (got %u rx, %u tx, expected %u)",
172 0 : device, channels.rx_count, channels.tx_count, expected_channel_count );
173 0 : }
174 0 : current_channels = channels.rx_count;
175 0 : }
176 :
177 0 : if( FD_UNLIKELY( current_channels != expected_channel_count ) ) {
178 0 : if( FD_UNLIKELY( !supports_channels ) ) {
179 0 : FD_LOG_ERR(( "Network device `%s` does not support setting number of channels, "
180 0 : "but you are running with more than one net tile (expected {%u}), "
181 0 : "and there must be one channel per tile. You can either use a NIC "
182 0 : "that supports multiple channels, or run Firedancer with only one "
183 0 : "net tile. You can configure Firedancer to run with only one QUIC "
184 0 : "tile by setting `layout.net_tile_count` to 1 in your "
185 0 : "configuration file. It is not recommended to do this in production "
186 0 : "as it will limit network performance.",
187 0 : device, expected_channel_count ));
188 0 : } else {
189 0 : NOT_CONFIGURED( "device `%s` does not have right number of channels (got %u but "
190 0 : "expected %u)",
191 0 : device, current_channels, expected_channel_count );
192 0 : }
193 0 : }
194 :
195 0 : CONFIGURE_OK();
196 0 : }
197 :
198 : static configure_result_t
199 0 : check( config_t * const config ) {
200 0 : if( FD_UNLIKELY( device_is_bonded( config->tiles.net.interface ) ) ) {
201 0 : char line[ 4096 ];
202 0 : device_read_slaves( config->tiles.net.interface, line );
203 0 : char * saveptr;
204 0 : for( char * token=strtok_r( line, " \t", &saveptr ); token!=NULL; token=strtok_r( NULL, " \t", &saveptr ) ) {
205 0 : CHECK( check_device( token, config->layout.net_tile_count ) );
206 0 : }
207 0 : } else {
208 0 : CHECK( check_device( config->tiles.net.interface, config->layout.net_tile_count ) );
209 0 : }
210 :
211 0 : CONFIGURE_OK();
212 0 : }
213 :
214 : configure_stage_t ethtool_channels = {
215 : .name = NAME,
216 : .always_recreate = 0,
217 : .enabled = enabled,
218 : .init_perm = init_perm,
219 : .fini_perm = NULL,
220 : .init = init,
221 : .fini = NULL,
222 : .check = check,
223 : };
224 :
225 : #undef NAME
|