Line data Source code
1 : #include "configure.h"
2 :
3 : #include <stdio.h>
4 : #include <unistd.h>
5 : #include <sys/ioctl.h>
6 : #include <sys/stat.h>
7 : #include <linux/if.h>
8 : #include <linux/ethtool.h>
9 : #include <linux/sockios.h>
10 :
11 0 : #define NAME "ethtool-channels"
12 :
13 : static int
14 0 : enabled( config_t * const config ) {
15 : /* if we're running in a network namespace, we configure ethtool on
16 : the virtual device as part of netns setup, not here */
17 0 : return !config->development.netns.enabled;
18 0 : }
19 :
20 : static void
21 : init_perm( fd_caps_ctx_t * caps,
22 0 : config_t * const config ) {
23 0 : (void)config;
24 0 : fd_caps_check_root( caps, NAME, "increase network device channels with `ethtool --set-channels`" );
25 0 : }
26 :
27 : static int
28 0 : device_is_bonded( const char * device ) {
29 0 : char path[ PATH_MAX ];
30 0 : FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding", device ) );
31 0 : struct stat st;
32 0 : int err = stat( path, &st );
33 0 : if( FD_UNLIKELY( err && errno != ENOENT ) )
34 0 : FD_LOG_ERR(( "error checking if device `%s` is bonded, stat(%s) failed (%i-%s)",
35 0 : device, path, errno, fd_io_strerror( errno ) ));
36 0 : return !err;
37 0 : }
38 :
39 : static void
40 : device_read_slaves( const char * device,
41 0 : char output[ 4096 ] ) {
42 0 : char path[ PATH_MAX ];
43 0 : FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding/slaves", device ) );
44 :
45 0 : FILE * fp = fopen( path, "r" );
46 0 : if( FD_UNLIKELY( !fp ) )
47 0 : FD_LOG_ERR(( "error configuring network device, fopen(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
48 0 : if( FD_UNLIKELY( !fgets( output, 4096, fp ) ) )
49 0 : FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
50 0 : if( FD_UNLIKELY( feof( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (EOF)", path ));
51 0 : if( FD_UNLIKELY( ferror( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (error)", path ));
52 0 : if( FD_UNLIKELY( strlen( output ) == 4095 ) ) FD_LOG_ERR(( "line too long in `%s`", path ));
53 0 : if( FD_UNLIKELY( strlen( output ) == 0 ) ) FD_LOG_ERR(( "line empty in `%s`", path ));
54 0 : if( FD_UNLIKELY( fclose( fp ) ) )
55 0 : FD_LOG_ERR(( "error configuring network device, fclose(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
56 0 : output[ strlen( output ) - 1 ] = '\0';
57 0 : }
58 :
59 : static void
60 : init_device( const char * device,
61 0 : uint combined_channel_count ) {
62 0 : if( FD_UNLIKELY( strlen( device ) >= IF_NAMESIZE ) ) FD_LOG_ERR(( "device name `%s` is too long", device ));
63 0 : if( FD_UNLIKELY( strlen( device ) == 0 ) ) FD_LOG_ERR(( "device name `%s` is empty", device ));
64 :
65 0 : int sock = socket( AF_INET, SOCK_DGRAM, 0 );
66 0 : if( FD_UNLIKELY( sock < 0 ) )
67 0 : FD_LOG_ERR(( "error configuring network device, socket(AF_INET,SOCK_DGRAM,0) failed (%i-%s)",
68 0 : errno, fd_io_strerror( errno ) ));
69 :
70 0 : struct ethtool_channels channels = {0};
71 0 : channels.cmd = ETHTOOL_GCHANNELS;
72 :
73 0 : struct ifreq ifr = {0};
74 0 : strncpy( ifr.ifr_name, device, IF_NAMESIZE-1 );
75 0 : ifr.ifr_data = (void *)&channels;
76 :
77 0 : if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) )
78 0 : FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_GCHANNELS) failed (%i-%s)",
79 0 : errno, fd_io_strerror( errno ) ));
80 :
81 0 : channels.cmd = ETHTOOL_SCHANNELS;
82 0 : if( channels.max_combined ) {
83 0 : channels.combined_count = combined_channel_count;
84 0 : channels.rx_count = 0;
85 0 : channels.tx_count = 0;
86 0 : FD_LOG_NOTICE(( "RUN: `ethtool --set-channels %s combined %u`", device, combined_channel_count ));
87 0 : } else {
88 0 : channels.combined_count = 0;
89 0 : channels.rx_count = combined_channel_count;
90 0 : channels.tx_count = combined_channel_count;
91 0 : FD_LOG_NOTICE(( "RUN: `ethtool --set-channels %s rx %u tx %u`", device, combined_channel_count, combined_channel_count ));
92 0 : }
93 :
94 0 : if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) ) {
95 0 : if( FD_LIKELY( errno == EBUSY ) )
96 0 : FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_SCHANNELS) failed (%i-%s). "
97 0 : "This is most commonly caused by an issue with the Intel ice driver on certain versions "
98 0 : "of Ubuntu. If you are using the ice driver, `sudo dmesg | grep %s` contains "
99 0 : "messages about RDMA, and you do not need RDMA, try running `rmmod irdma` and/or "
100 0 : "blacklisting the irdma kernel module.",
101 0 : errno, fd_io_strerror( errno ), device ));
102 0 : else
103 0 : FD_LOG_ERR(( "error configuring network device, ioctl(SIOCETHTOOL,ETHTOOL_SCHANNELS) failed (%i-%s)",
104 0 : errno, fd_io_strerror( errno ) ));
105 0 : }
106 :
107 :
108 0 : if( FD_UNLIKELY( close( sock ) ) )
109 0 : FD_LOG_ERR(( "error configuring network device, close() socket failed (%i-%s)", errno, fd_io_strerror( errno ) ));
110 0 : }
111 :
112 : static void
113 0 : init( config_t * const config ) {
114 : /* we need one channel for both TX and RX on the NIC for each QUIC
115 : tile, but the interface probably defaults to one channel total */
116 0 : if( FD_UNLIKELY( device_is_bonded( config->tiles.net.interface ) ) ) {
117 : /* if using a bonded device, we need to set channels on the
118 : underlying devices. */
119 0 : char line[ 4096 ];
120 0 : device_read_slaves( config->tiles.net.interface, line );
121 0 : char * saveptr;
122 0 : for( char * token=strtok_r( line , " \t", &saveptr ); token!=NULL; token=strtok_r( NULL, " \t", &saveptr ) ) {
123 0 : init_device( token, config->layout.net_tile_count );
124 0 : }
125 0 : } else {
126 0 : init_device( config->tiles.net.interface, config->layout.net_tile_count );
127 0 : }
128 0 : }
129 :
130 : static configure_result_t
131 : check_device( const char * device,
132 0 : uint expected_channel_count ) {
133 0 : if( FD_UNLIKELY( strlen( device ) >= IF_NAMESIZE ) ) FD_LOG_ERR(( "device name `%s` is too long", device ));
134 0 : if( FD_UNLIKELY( strlen( device ) == 0 ) ) FD_LOG_ERR(( "device name `%s` is empty", device ));
135 :
136 0 : int sock = socket( AF_INET, SOCK_DGRAM, 0 );
137 0 : if( FD_UNLIKELY( sock < 0 ) )
138 0 : FD_LOG_ERR(( "error configuring network device, socket(AF_INET,SOCK_DGRAM,0) failed (%i-%s)",
139 0 : errno, fd_io_strerror( errno ) ));
140 :
141 0 : struct ethtool_channels channels = {0};
142 0 : channels.cmd = ETHTOOL_GCHANNELS;
143 :
144 0 : struct ifreq ifr = {0};
145 0 : strncpy( ifr.ifr_name, device, IF_NAMESIZE );
146 0 : ifr.ifr_name[ IF_NAMESIZE - 1 ] = '\0'; // silence linter, not needed for correctness
147 0 : ifr.ifr_data = (void *)&channels;
148 :
149 0 : int supports_channels = 1;
150 0 : uint current_channels = 0;
151 0 : if( FD_UNLIKELY( ioctl( sock, SIOCETHTOOL, &ifr ) ) ) {
152 0 : if( FD_LIKELY( errno == EOPNOTSUPP ) ) {
153 : /* network device doesn't support setting number of channels, so
154 : it must always be 1 */
155 0 : supports_channels = 0;
156 0 : current_channels = 1;
157 0 : } else {
158 0 : FD_LOG_ERR(( "error configuring network device `%s`, ioctl(SIOCETHTOOL,ETHTOOL_GCHANNELS) failed (%i-%s)",
159 0 : device, errno, fd_io_strerror( errno ) ));
160 0 : }
161 0 : }
162 :
163 0 : if( FD_UNLIKELY( close( sock ) ) )
164 0 : FD_LOG_ERR(( "error configuring network device, close() socket failed (%i-%s)", errno, fd_io_strerror( errno ) ));
165 :
166 0 : if( channels.combined_count ) {
167 0 : current_channels = channels.combined_count;
168 0 : } else if( channels.rx_count || channels.tx_count ) {
169 0 : if( FD_UNLIKELY( channels.rx_count != channels.tx_count ) ) {
170 0 : NOT_CONFIGURED( "device `%s` has unbalanced channel count: (got %u rx, %u tx, expected %u)",
171 0 : device, channels.rx_count, channels.tx_count, expected_channel_count );
172 0 : }
173 0 : current_channels = channels.rx_count;
174 0 : }
175 :
176 0 : if( FD_UNLIKELY( current_channels != expected_channel_count ) ) {
177 0 : if( FD_UNLIKELY( !supports_channels ) ) {
178 0 : FD_LOG_ERR(( "Network device `%s` does not support setting number of channels, "
179 0 : "but you are running with more than one net tile (expected {%u}), "
180 0 : "and there must be one channel per tile. You can either use a NIC "
181 0 : "that supports multiple channels, or run Firedancer with only one "
182 0 : "net tile. You can configure Firedancer to run with only one QUIC "
183 0 : "tile by setting `layout.net_tile_count` to 1 in your "
184 0 : "configuration file. It is not recommended to do this in production "
185 0 : "as it will limit network performance.",
186 0 : device, expected_channel_count ));
187 0 : } else {
188 0 : NOT_CONFIGURED( "device `%s` does not have right number of channels (got %u but "
189 0 : "expected %u)",
190 0 : device, current_channels, expected_channel_count );
191 0 : }
192 0 : }
193 :
194 0 : CONFIGURE_OK();
195 0 : }
196 :
197 : static configure_result_t
198 0 : check( config_t * const config ) {
199 0 : if( FD_UNLIKELY( device_is_bonded( config->tiles.net.interface ) ) ) {
200 0 : char line[ 4096 ];
201 0 : device_read_slaves( config->tiles.net.interface, line );
202 0 : char * saveptr;
203 0 : for( char * token=strtok_r( line, " \t", &saveptr ); token!=NULL; token=strtok_r( NULL, " \t", &saveptr ) ) {
204 0 : CHECK( check_device( token, config->layout.net_tile_count ) );
205 0 : }
206 0 : } else {
207 0 : CHECK( check_device( config->tiles.net.interface, config->layout.net_tile_count ) );
208 0 : }
209 :
210 0 : CONFIGURE_OK();
211 0 : }
212 :
213 : configure_stage_t fd_cfg_stage_ethtool_channels = {
214 : .name = NAME,
215 : .always_recreate = 0,
216 : .enabled = enabled,
217 : .init_perm = init_perm,
218 : .fini_perm = NULL,
219 : .init = init,
220 : .fini = NULL,
221 : .check = check,
222 : };
223 :
224 : #undef NAME
|