Line data Source code
1 : #include "configure.h"
2 :
3 : #include <errno.h>
4 : #include <stdio.h>
5 : #include <unistd.h>
6 : #include <sys/stat.h>
7 :
8 : #include "fd_ethtool_ioctl.h"
9 :
10 0 : #define NAME "ethtool-channels"
11 :
12 : static int
13 0 : enabled( fd_config_t const * config ) {
14 :
15 : /* if we're running in a network namespace, we configure ethtool on
16 : the virtual device as part of netns setup, not here */
17 0 : if( config->development.netns.enabled ) return 0;
18 :
19 : /* only enable if network stack is XDP */
20 0 : if( 0!=strcmp( config->net.provider, "xdp" ) ) return 0;
21 :
22 0 : return 1;
23 0 : }
24 :
25 : static void
26 : init_perm( fd_cap_chk_t * chk,
27 0 : fd_config_t const * config FD_PARAM_UNUSED ) {
28 0 : fd_cap_chk_root( chk, NAME, "modify network device configuration with ethtool" );
29 0 : }
30 :
31 : static void
32 : fini_perm( fd_cap_chk_t * chk,
33 0 : fd_config_t const * config FD_PARAM_UNUSED ) {
34 0 : fd_cap_chk_root( chk, NAME, "modify network device configuration with ethtool" );
35 0 : }
36 :
37 : static int
38 0 : device_is_bonded( char const * device ) {
39 0 : char path[ PATH_MAX ];
40 0 : FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding", device ) );
41 0 : struct stat st;
42 0 : int err = stat( path, &st );
43 0 : if( FD_UNLIKELY( err && errno != ENOENT ) )
44 0 : FD_LOG_ERR(( "error checking if device `%s` is bonded, stat(%s) failed (%i-%s)",
45 0 : device, path, errno, fd_io_strerror( errno ) ));
46 0 : return !err;
47 0 : }
48 :
49 : static void
50 : device_read_slaves( char const * device,
51 0 : char output[ 4096 ] ) {
52 0 : char path[ PATH_MAX ];
53 0 : FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding/slaves", device ) );
54 :
55 0 : FILE * fp = fopen( path, "r" );
56 0 : if( FD_UNLIKELY( !fp ) )
57 0 : FD_LOG_ERR(( "error configuring network device, fopen(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
58 0 : if( FD_UNLIKELY( !fgets( output, 4096, fp ) ) )
59 0 : FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
60 0 : if( FD_UNLIKELY( feof( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (EOF)", path ));
61 0 : if( FD_UNLIKELY( ferror( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (error)", path ));
62 0 : if( FD_UNLIKELY( strlen( output ) == 4095 ) ) FD_LOG_ERR(( "line too long in `%s`", path ));
63 0 : if( FD_UNLIKELY( strlen( output ) == 0 ) ) FD_LOG_ERR(( "line empty in `%s`", path ));
64 0 : if( FD_UNLIKELY( fclose( fp ) ) )
65 0 : FD_LOG_ERR(( "error configuring network device, fclose(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
66 0 : output[ strlen( output ) - 1 ] = '\0';
67 0 : }
68 :
69 : static void
70 : init_device( char const * device,
71 0 : fd_config_t const * config ) {
72 0 : int const simple_mode = (0==strcmp( config->net.xdp.rss_queue_mode, "simple" ));
73 0 : int const dedicated_mode = (0==strcmp( config->net.xdp.rss_queue_mode, "dedicated" ));
74 :
75 0 : fd_ethtool_ioctl_t ioc;
76 0 : if( FD_UNLIKELY( &ioc != fd_ethtool_ioctl_init( &ioc, device ) ) )
77 0 : FD_LOG_ERR(( "error configuring network device (%s), unable to init ethtool ioctl", device ));
78 :
79 : /* This should happen first, otherwise changing the number of channels may fail */
80 0 : FD_TEST( 0==fd_ethtool_ioctl_rxfh_set_default( &ioc ) );
81 :
82 0 : uint const num_channels = simple_mode ? config->layout.net_tile_count : 0 /* maximum allowed */;
83 0 : int ret = fd_ethtool_ioctl_channels_set_num( &ioc, num_channels );
84 0 : if( FD_UNLIKELY( ret != 0 ) ) {
85 0 : if( FD_LIKELY( ret == EBUSY ) )
86 0 : FD_LOG_ERR(( "error configuring network device (%s), failed to set number of channels. "
87 0 : "This is most commonly caused by an issue with the Intel ice driver on certain versions "
88 0 : "of Ubuntu. If you are using the ice driver, `sudo dmesg | grep %s` contains "
89 0 : "messages about RDMA, and you do not need RDMA, try running `rmmod irdma` and/or "
90 0 : "blacklisting the irdma kernel module.", device, device ));
91 0 : else
92 0 : FD_LOG_ERR(( "error configuring network device (%s), failed to set number of channels", device ));
93 0 : }
94 :
95 0 : FD_TEST( 0==fd_ethtool_ioctl_ntuple_clear( &ioc ) );
96 :
97 0 : if( dedicated_mode ) {
98 0 : if( FD_UNLIKELY( config->layout.net_tile_count != 1 ) )
99 0 : FD_LOG_ERR(( "`layout.net_tile_count` must be 1 when `net.xdp.rss_queue_mode` is \"dedicated\"" ));
100 :
101 : /* Remove queue 0 from the rxfh table. This queue is dedicated for xdp. */
102 0 : if( FD_UNLIKELY( 0!=fd_ethtool_ioctl_rxfh_set_suffix( &ioc, 1 ) ) )
103 0 : FD_LOG_ERR(( "error configuring network device (%s), failed to isolate queue zero. Try `net.xdp.rss_queue_mode=\"simple\"`", device ));
104 :
105 0 : if( FD_UNLIKELY( 0!=fd_ethtool_ioctl_feature_set( &ioc, FD_ETHTOOL_FEATURE_NTUPLE, 1 ) ) )
106 0 : FD_LOG_ERR(( "error configuring network device (%s), failed to enable ntuple feature. Try `net.xdp.rss_queue_mode=\"simple\"`", device ));
107 :
108 : /* FIXME Centrally define listen port list to avoid this configure
109 : stage from going out of sync with port mappings. */
110 0 : int error = 0;
111 0 : uint rule_idx = 0;
112 0 : error |= ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->tiles.shred.shred_listen_port, 0 ) );
113 0 : error |= ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->tiles.quic.quic_transaction_listen_port, 0 ) );
114 0 : error |= ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->tiles.quic.regular_transaction_listen_port, 0 ) );
115 0 : if( config->is_firedancer ) {
116 0 : error |= ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->gossip.port, 0 ) );
117 0 : error |= ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->tiles.repair.repair_intake_listen_port, 0 ) );
118 0 : error |= ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->tiles.repair.repair_serve_listen_port, 0 ) );
119 0 : error |= ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->tiles.send.send_src_port, 0 ) );
120 0 : }
121 0 : if( FD_UNLIKELY( error ) )
122 0 : FD_LOG_ERR(( "error configuring network device (%s), failed to install ntuple rules. Try `net.xdp.rss_queue_mode=\"simple\"`", device ));
123 0 : }
124 :
125 0 : fd_ethtool_ioctl_fini( &ioc );
126 0 : }
127 :
128 : static void
129 0 : init( fd_config_t const * config ) {
130 : /* we need one channel for both TX and RX on the NIC for each net
131 : tile, but the interface probably defaults to one channel total */
132 0 : if( FD_UNLIKELY( device_is_bonded( config->net.interface ) ) ) {
133 : /* if using a bonded device, we need to set channels on the
134 : underlying devices. */
135 0 : char line[ 4096 ];
136 0 : device_read_slaves( config->net.interface, line );
137 0 : char * saveptr;
138 0 : for( char * token=strtok_r( line , " \t", &saveptr ); token!=NULL; token=strtok_r( NULL, " \t", &saveptr ) ) {
139 0 : init_device( token, config );
140 0 : }
141 0 : } else {
142 0 : init_device( config->net.interface, config );
143 0 : }
144 0 : }
145 :
146 : static configure_result_t
147 : check_device( char const * device,
148 0 : fd_config_t const * config ) {
149 0 : int const simple_mode = (0==strcmp( config->net.xdp.rss_queue_mode, "simple" ));
150 0 : int const dedicated_mode = (0==strcmp( config->net.xdp.rss_queue_mode, "dedicated" ));
151 :
152 0 : fd_ethtool_ioctl_t ioc;
153 0 : if( FD_UNLIKELY( &ioc != fd_ethtool_ioctl_init( &ioc, device ) ) )
154 0 : FD_LOG_ERR(( "error configuring network device (%s), unable to init ethtool ioctl", device ));
155 :
156 0 : int error = 0; /* is anything not fully configured according to the current mode */
157 0 : int modified = 0; /* is anything changed from the default (fini'd) state */
158 :
159 0 : fd_ethtool_ioctl_channels_t channels;
160 0 : FD_TEST( 0==fd_ethtool_ioctl_channels_get_num( &ioc, &channels ) );
161 0 : if( channels.current != channels.max ) {
162 0 : modified = 1;
163 0 : if( dedicated_mode ) {
164 0 : error = 1;
165 0 : FD_LOG_WARNING(( "device `%s` does not have right number of channels (got %u but "
166 0 : "expected %u)",
167 0 : device, channels.current, channels.max ));
168 0 : }
169 0 : }
170 0 : if( simple_mode ) {
171 0 : if( FD_UNLIKELY( channels.current != config->layout.net_tile_count ) ) {
172 0 : error = 1;
173 0 : if( FD_UNLIKELY( !channels.supported ) ) {
174 0 : FD_LOG_ERR(( "Network device `%s` does not support setting number of channels, "
175 0 : "but you are running with more than one net tile (expected {%u}), "
176 0 : "and there must be one channel per tile. You can either use a NIC "
177 0 : "that supports multiple channels, or run Firedancer with only one "
178 0 : "net tile. You can configure Firedancer to run with only one net "
179 0 : "tile by setting `layout.net_tile_count` to 1 in your "
180 0 : "configuration file.",
181 0 : device, config->layout.net_tile_count ));
182 0 : } else {
183 0 : FD_LOG_WARNING(( "device `%s` does not have right number of channels (got %u but "
184 0 : "expected %u)",
185 0 : device, channels.current, config->layout.net_tile_count ));
186 0 : }
187 0 : }
188 0 : }
189 :
190 0 : uint rxfh_table[ FD_ETHTOOL_MAX_RXFH_TABLE_CNT ] = { 0 };
191 0 : uint rxfh_table_ele_cnt;
192 0 : FD_TEST( 0==fd_ethtool_ioctl_rxfh_get_table( &ioc, rxfh_table, &rxfh_table_ele_cnt ) );
193 0 : if( rxfh_table_ele_cnt==0 ) {
194 0 : if( dedicated_mode ) {
195 0 : error = 1;
196 0 : FD_LOG_WARNING(( "device `%s` does not have any rxfh table installed", device ));
197 0 : }
198 0 : } else {
199 0 : int rxfh_error = 0;
200 0 : uint default_queue = 0;
201 0 : uint configured_queue = dedicated_mode ? 1 : 0;
202 0 : for( uint j=0u; j<rxfh_table_ele_cnt; j++) {
203 0 : modified |= (rxfh_table[ j ] != default_queue++);
204 0 : rxfh_error |= (rxfh_table[ j ] != configured_queue++);
205 0 : if( default_queue >= channels.current )
206 0 : default_queue = 0;
207 0 : if( configured_queue >= channels.current )
208 0 : configured_queue = dedicated_mode ? 1 : 0;
209 0 : }
210 0 : if( FD_UNLIKELY( rxfh_error ) ) {
211 0 : error = 1;
212 0 : FD_LOG_WARNING(( "device `%s` does not have the correct rxfh table installed", device ));
213 0 : }
214 0 : }
215 :
216 0 : if( dedicated_mode ) {
217 0 : int ntuple_feature_active;
218 0 : FD_TEST( 0==fd_ethtool_ioctl_feature_test( &ioc, FD_ETHTOOL_FEATURE_NTUPLE, &ntuple_feature_active ) );
219 0 : if( !ntuple_feature_active ) {
220 0 : error = 1;
221 0 : FD_LOG_WARNING(( "device `%s` has incorrect ntuple feature flag, should be enabled", device ));
222 0 : }
223 0 : }
224 :
225 0 : int ntuple_rules_empty;
226 0 : FD_TEST( 0==fd_ethtool_ioctl_ntuple_validate_udp_dport( &ioc, NULL, 0, 0, &ntuple_rules_empty ) );
227 0 : if( !ntuple_rules_empty ) {
228 0 : modified = 1;
229 0 : if( simple_mode ) {
230 0 : error = 1;
231 0 : FD_LOG_WARNING(( "device `%s` should not have ntuple rules", device ));
232 0 : }
233 0 : }
234 0 : if( dedicated_mode ) {
235 : /* FIXME Centrally define listen port list to avoid this configure
236 : stage from going out of sync with port mappings. */
237 0 : uint num_ports = 0;
238 0 : ushort ports[ 32 ];
239 0 : ports[ num_ports++ ] = config->tiles.shred.shred_listen_port;
240 0 : ports[ num_ports++ ] = config->tiles.quic.quic_transaction_listen_port;
241 0 : ports[ num_ports++ ] = config->tiles.quic.regular_transaction_listen_port;
242 0 : if( config->is_firedancer ) {
243 0 : ports[ num_ports++ ] = config->gossip.port;
244 0 : ports[ num_ports++ ] = config->tiles.repair.repair_intake_listen_port;
245 0 : ports[ num_ports++ ] = config->tiles.repair.repair_serve_listen_port;
246 0 : ports[ num_ports++ ] = config->tiles.send.send_src_port;
247 0 : }
248 0 : int ports_valid;
249 0 : FD_TEST( 0==fd_ethtool_ioctl_ntuple_validate_udp_dport( &ioc, ports, num_ports, 0, &ports_valid ));
250 0 : if( FD_UNLIKELY( !ports_valid ) ) {
251 0 : error = 1;
252 0 : FD_LOG_WARNING(( "device `%s` has incorrect ntuple rules", device ));
253 0 : }
254 0 : }
255 :
256 0 : fd_ethtool_ioctl_fini( &ioc );
257 :
258 0 : if( !error )
259 0 : CONFIGURE_OK();
260 0 : if( modified )
261 0 : PARTIALLY_CONFIGURED( "device `%s` has partial ethtool-channels network configuration", device );
262 0 : NOT_CONFIGURED( "device `%s` missing ethtool-channels network configuration", device );
263 0 : }
264 :
265 : static configure_result_t
266 0 : check( fd_config_t const * config ) {
267 0 : if( FD_UNLIKELY( device_is_bonded( config->net.interface ) ) ) {
268 0 : char line[ 4096 ];
269 0 : device_read_slaves( config->net.interface, line );
270 0 : char * saveptr;
271 0 : for( char * token=strtok_r( line, " \t", &saveptr ); token!=NULL; token=strtok_r( NULL, " \t", &saveptr ) ) {
272 0 : CHECK( check_device( token, config ) );
273 0 : }
274 0 : } else {
275 0 : CHECK( check_device( config->net.interface, config ) );
276 0 : }
277 :
278 0 : CONFIGURE_OK();
279 0 : }
280 :
281 : static int
282 0 : fini_device( char const * device ) {
283 0 : int error = 0;
284 :
285 0 : fd_ethtool_ioctl_t ioc;
286 0 : if( FD_UNLIKELY( &ioc != fd_ethtool_ioctl_init( &ioc, device ) ) )
287 0 : FD_LOG_ERR(( "error configuring network device (%s), unable to init ethtool ioctl", device ));
288 :
289 : /* It may be the case for certain devices that the default state is
290 : the same as the init'd state (in simple mode). In this case the
291 : following fini commands will all be noops, which is fine. But we
292 : need to return 0 so that the configure stage logic does not
293 : consider this to be an error. We compare the state before and
294 : after to see if anything was changed by fini. */
295 0 : fd_ethtool_ioctl_channels_t channels_orig;
296 0 : error |= (0!=fd_ethtool_ioctl_channels_get_num( &ioc, &channels_orig ));
297 0 : uint rxfh_table_orig[ FD_ETHTOOL_MAX_RXFH_TABLE_CNT ] = { 0 };
298 0 : uint rxfh_table_orig_ele_cnt;
299 0 : error |= (0!=fd_ethtool_ioctl_rxfh_get_table( &ioc, rxfh_table_orig, &rxfh_table_orig_ele_cnt ));
300 0 : int ntuple_rules_empty_orig;
301 0 : error |= (0!=fd_ethtool_ioctl_ntuple_validate_udp_dport( &ioc, NULL, 0, 0, &ntuple_rules_empty_orig ));
302 0 : if( FD_UNLIKELY( error ) )
303 0 : FD_LOG_ERR(( "error configuring network device (%s), unable to determine initial state", device ));
304 :
305 : /* This should happen first, otherwise changing the number of channels may fail */
306 0 : error |= (0!=fd_ethtool_ioctl_rxfh_set_default( &ioc ));
307 :
308 0 : error |= (0!=fd_ethtool_ioctl_channels_set_num( &ioc, 0 /* max */ ));
309 :
310 : /* Some drivers (i40e) do not always evenly redistribute the RXFH table
311 : when increasing the channel count, so we run this again just in case. */
312 0 : error |= (0!=fd_ethtool_ioctl_rxfh_set_default( &ioc ));
313 :
314 : /* Note: We leave the ntuple feature flag as-is in fini */
315 0 : error |= (0!=fd_ethtool_ioctl_ntuple_clear( &ioc ));
316 :
317 0 : if( FD_UNLIKELY( error ) )
318 0 : FD_LOG_ERR(( "error configuring network device (%s), unable to set to default state", device ));
319 :
320 0 : fd_ethtool_ioctl_channels_t channels_new;
321 0 : error |= (0!=fd_ethtool_ioctl_channels_get_num( &ioc, &channels_new ));
322 0 : uint rxfh_table_new[ FD_ETHTOOL_MAX_RXFH_TABLE_CNT ] = { 0 };
323 0 : uint rxfh_table_new_ele_cnt;
324 0 : error |= (0!=fd_ethtool_ioctl_rxfh_get_table( &ioc, rxfh_table_new, &rxfh_table_new_ele_cnt ));
325 0 : int ntuple_rules_empty_new;
326 0 : error |= (0!=fd_ethtool_ioctl_ntuple_validate_udp_dport( &ioc, NULL, 0, 0, &ntuple_rules_empty_new ));
327 0 : if( FD_UNLIKELY( error ) )
328 0 : FD_LOG_ERR(( "error configuring network device (%s), unable to determine final state", device ));
329 :
330 0 : fd_ethtool_ioctl_fini( &ioc );
331 :
332 0 : int modified = (0!=memcmp( &channels_orig, &channels_new, sizeof(fd_ethtool_ioctl_channels_t) )) ||
333 0 : (rxfh_table_orig_ele_cnt != rxfh_table_new_ele_cnt) ||
334 0 : (0!=memcmp( rxfh_table_orig, rxfh_table_new, rxfh_table_orig_ele_cnt * sizeof(uint) )) ||
335 0 : (ntuple_rules_empty_orig!=ntuple_rules_empty_new);
336 0 : return modified;
337 0 : }
338 :
339 : static int
340 : fini( fd_config_t const * config,
341 0 : int pre_init FD_PARAM_UNUSED ) {
342 0 : int done = 0;
343 0 : if( FD_UNLIKELY( device_is_bonded( config->net.interface ) ) ) {
344 0 : char line[ 4096 ];
345 0 : device_read_slaves( config->net.interface, line );
346 0 : char * saveptr;
347 0 : for( char * token=strtok_r( line , " \t", &saveptr ); token!=NULL; token=strtok_r( NULL, " \t", &saveptr ) ) {
348 0 : done |= fini_device( token );
349 0 : }
350 0 : } else {
351 0 : done |= fini_device( config->net.interface );
352 0 : }
353 0 : return done;
354 0 : }
355 :
356 : configure_stage_t fd_cfg_stage_ethtool_channels = {
357 : .name = NAME,
358 : .always_recreate = 0,
359 : .enabled = enabled,
360 : .init_perm = init_perm,
361 : .fini_perm = fini_perm,
362 : .init = init,
363 : .fini = fini,
364 : .check = check,
365 : };
366 :
367 : #undef NAME
|