Line data Source code
1 : #include "configure.h"
2 :
3 : #include <errno.h>
4 : #include <stdio.h>
5 : #include <unistd.h>
6 : #include <sys/stat.h>
7 :
8 : #include "fd_ethtool_ioctl.h"
9 :
10 0 : #define NAME "ethtool-channels"
11 :
12 : static int
13 0 : enabled( fd_config_t const * config ) {
14 :
15 : /* if we're running in a network namespace, we configure ethtool on
16 : the virtual device as part of netns setup, not here */
17 0 : if( config->development.netns.enabled ) return 0;
18 :
19 : /* only enable if network stack is XDP */
20 0 : if( 0!=strcmp( config->net.provider, "xdp" ) ) return 0;
21 :
22 0 : return 1;
23 0 : }
24 :
25 : static void
26 : init_perm( fd_cap_chk_t * chk,
27 0 : fd_config_t const * config FD_PARAM_UNUSED ) {
28 0 : fd_cap_chk_root( chk, NAME, "modify network device configuration with ethtool" );
29 0 : }
30 :
31 : static void
32 : fini_perm( fd_cap_chk_t * chk,
33 0 : fd_config_t const * config FD_PARAM_UNUSED ) {
34 0 : fd_cap_chk_root( chk, NAME, "modify network device configuration with ethtool" );
35 0 : }
36 :
37 : static int
38 0 : device_is_bonded( char const * device ) {
39 0 : char path[ PATH_MAX ];
40 0 : FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding", device ) );
41 0 : struct stat st;
42 0 : int err = stat( path, &st );
43 0 : if( FD_UNLIKELY( err && errno != ENOENT ) )
44 0 : FD_LOG_ERR(( "error checking if device `%s` is bonded, stat(%s) failed (%i-%s)",
45 0 : device, path, errno, fd_io_strerror( errno ) ));
46 0 : return !err;
47 0 : }
48 :
49 : static void
50 : device_read_slaves( char const * device,
51 : char output[ 4096 ],
52 0 : char const * devices[ 16 ] ) {
53 0 : char path[ PATH_MAX ];
54 0 : FD_TEST( fd_cstr_printf_check( path, PATH_MAX, NULL, "/sys/class/net/%s/bonding/slaves", device ) );
55 :
56 0 : FILE * fp = fopen( path, "r" );
57 0 : if( FD_UNLIKELY( !fp ) )
58 0 : FD_LOG_ERR(( "error configuring network device, fopen(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
59 0 : if( FD_UNLIKELY( !fgets( output, 4096, fp ) ) )
60 0 : FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
61 0 : if( FD_UNLIKELY( feof( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (EOF)", path ));
62 0 : if( FD_UNLIKELY( ferror( fp ) ) ) FD_LOG_ERR(( "error configuring network device, fgets(%s) failed (error)", path ));
63 0 : if( FD_UNLIKELY( strlen( output ) == 4095 ) ) FD_LOG_ERR(( "line too long in `%s`", path ));
64 0 : if( FD_UNLIKELY( strlen( output ) == 0 ) ) FD_LOG_ERR(( "line empty in `%s`", path ));
65 0 : if( FD_UNLIKELY( fclose( fp ) ) )
66 0 : FD_LOG_ERR(( "error configuring network device, fclose(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
67 0 : output[ strlen( output ) - 1 ] = '\0';
68 :
69 0 : ulong num = 0UL;
70 0 : char * saveptr;
71 0 : for( char * token=strtok_r( output, " \t", &saveptr ); token!=NULL && num<15; token=strtok_r( NULL, " \t", &saveptr ) ) {
72 0 : devices[ num++ ] = token;
73 0 : }
74 0 : devices[ num ] = NULL;
75 0 : FD_TEST( devices[ 0 ]!=NULL );
76 0 : }
77 :
78 : /* Attempts to initialize the device in simple or dedicated mode. If
79 : strict is true, FD_LOG_ERR's on failure. Otherwise, returns 1 on
80 : failure. Returns 0 on success. */
81 : static int
82 : init_device( char const * device,
83 : fd_config_t const * config,
84 : int dedicated_mode,
85 0 : int strict ) {
86 0 : FD_TEST( dedicated_mode || strict );
87 :
88 0 : fd_ethtool_ioctl_t ioc;
89 0 : if( FD_UNLIKELY( &ioc != fd_ethtool_ioctl_init( &ioc, device ) ) )
90 0 : FD_LOG_ERR(( "error configuring network device (%s), unable to init ethtool ioctl", device ));
91 :
92 : /* This should happen first, otherwise changing the number of channels may fail */
93 0 : FD_TEST( 0==fd_ethtool_ioctl_rxfh_set_default( &ioc ) );
94 :
95 0 : uint const num_channels = !dedicated_mode ? config->layout.net_tile_count : 0 /* maximum allowed */;
96 0 : int ret = fd_ethtool_ioctl_channels_set_num( &ioc, num_channels );
97 0 : if( FD_UNLIKELY( 0!=ret ) ) {
98 0 : if( strict ) {
99 0 : if( FD_LIKELY( ret == EBUSY ) )
100 0 : FD_LOG_ERR(( "error configuring network device (%s), failed to set number of channels. "
101 0 : "This is most commonly caused by an issue with the Intel ice driver on certain versions "
102 0 : "of Ubuntu. If you are using the ice driver, `sudo dmesg | grep %s` contains "
103 0 : "messages about RDMA, and you do not need RDMA, try running `rmmod irdma` and/or "
104 0 : "blacklisting the irdma kernel module.", device, device ));
105 0 : else
106 0 : FD_LOG_ERR(( "error configuring network device (%s), failed to set number of channels", device ));
107 0 : }
108 0 : return 1;
109 0 : }
110 :
111 0 : FD_TEST( 0==fd_ethtool_ioctl_ntuple_clear( &ioc ) );
112 :
113 0 : if( dedicated_mode ) {
114 : /* Remove queue 0 from the rxfh table. This queue is dedicated for xdp. */
115 0 : if( FD_UNLIKELY( 0!=fd_ethtool_ioctl_rxfh_set_suffix( &ioc, 1 ) ) ) {
116 0 : if( strict ) FD_LOG_ERR(( "error configuring network device (%s), failed to isolate queue zero. Try `net.xdp.rss_queue_mode=\"simple\"`", device ));
117 0 : else return 1;
118 0 : }
119 :
120 0 : if( FD_UNLIKELY( 0!=fd_ethtool_ioctl_feature_set( &ioc, FD_ETHTOOL_FEATURE_NTUPLE, 1 ) ) ) {
121 0 : if( strict ) FD_LOG_ERR(( "error configuring network device (%s), failed to enable ntuple feature. Try `net.xdp.rss_queue_mode=\"simple\"`", device ));
122 0 : else return 1;
123 0 : }
124 :
125 : /* FIXME Centrally define listen port list to avoid this configure
126 : stage from going out of sync with port mappings. */
127 0 : uint rule_idx = 0;
128 0 : int error =
129 0 : ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->tiles.shred.shred_listen_port, 0 ) )
130 0 : || ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->tiles.quic.quic_transaction_listen_port, 0 ) )
131 0 : || ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->tiles.quic.regular_transaction_listen_port, 0 ) );
132 0 : if( !error && config->is_firedancer ) {
133 0 : error =
134 0 : ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->gossip.port, 0 ) )
135 0 : || ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->tiles.repair.repair_intake_listen_port, 0 ) )
136 0 : || ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->tiles.repair.repair_serve_listen_port, 0 ) )
137 0 : || ( 0!=fd_ethtool_ioctl_ntuple_set_udp_dport( &ioc, rule_idx++, config->tiles.send.send_src_port, 0 ) );
138 0 : }
139 0 : if( FD_UNLIKELY( error ) ) {
140 0 : if( strict ) FD_LOG_ERR(( "error configuring network device (%s), failed to install ntuple rules. Try `net.xdp.rss_queue_mode=\"simple\"`", device ));
141 0 : else return 1;
142 0 : }
143 0 : }
144 :
145 0 : fd_ethtool_ioctl_fini( &ioc );
146 0 : return 0;
147 0 : }
148 :
149 : static void
150 0 : init( fd_config_t const * config ) {
151 0 : int only_dedicated =
152 0 : (0==strcmp( config->net.xdp.rss_queue_mode, "dedicated" ));
153 0 : int try_dedicated = only_dedicated ||
154 0 : (0==strcmp( config->net.xdp.rss_queue_mode, "auto" ) && 1UL==config->layout.net_tile_count );
155 0 : if( FD_UNLIKELY( only_dedicated && 1UL!=config->layout.net_tile_count ) )
156 0 : FD_LOG_ERR(( "`layout.net_tile_count` must be 1 when `net.xdp.rss_queue_mode` is \"dedicated\"" ));
157 :
158 : /* if using a bonded device, we need to set channels on the
159 : underlying devices. */
160 0 : int is_bonded = device_is_bonded( config->net.interface );
161 0 : char line[ 4096 ];
162 0 : char const * bond_devices[ 16 ];
163 0 : if( is_bonded ) device_read_slaves( config->net.interface, line, bond_devices );
164 :
165 : /* If the mode was auto, we will try to init in dedicated mode but will
166 : not fail the stage if this is not successful. If the mode was
167 : dedicated, we will require success. */
168 0 : if( try_dedicated ) {
169 0 : int failed = 0;
170 0 : if( is_bonded ) {
171 0 : for( ulong i=0UL; !failed && bond_devices[ i ]!=NULL; i++ ) {
172 0 : failed = init_device( bond_devices[ i ], config, 1, only_dedicated );
173 0 : }
174 0 : } else {
175 0 : failed = init_device( config->net.interface, config, 1, only_dedicated );
176 0 : }
177 0 : if( !failed ) return;
178 0 : FD_TEST( !only_dedicated );
179 0 : FD_LOG_WARNING(( "error configuring network device (%s), rss_queue_mode \"auto\" attempted"
180 0 : " \"dedicated\" configuration but falling back to \"simple\".", config->net.interface ));
181 0 : }
182 :
183 : /* Require success for simple mode, either configured or as fallback */
184 0 : if( is_bonded ) {
185 0 : for( ulong i=0UL; bond_devices[ i ]!=NULL; i++ ) {
186 0 : init_device( bond_devices[ i ], config, 0, 1 );
187 0 : }
188 0 : } else {
189 0 : init_device( config->net.interface, config, 0, 1 );
190 0 : }
191 0 : }
192 :
193 : /* Returns whether anything is changed from the default (fini'd) state */
194 : static int
195 0 : check_device_is_modified( char const * device ) {
196 0 : fd_ethtool_ioctl_t ioc __attribute__((cleanup(fd_ethtool_ioctl_fini)));
197 0 : if( FD_UNLIKELY( &ioc!=fd_ethtool_ioctl_init( &ioc, device ) ) )
198 0 : FD_LOG_ERR(( "error configuring network device (%s), unable to init ethtool ioctl", device ));
199 :
200 0 : fd_ethtool_ioctl_channels_t channels;
201 0 : FD_TEST( 0==fd_ethtool_ioctl_channels_get_num( &ioc, &channels ) );
202 0 : if( channels.current!=channels.max ) return 1;
203 :
204 0 : uint rxfh_table[ FD_ETHTOOL_MAX_RXFH_TABLE_CNT ] = { 0 };
205 0 : uint rxfh_table_ele_cnt;
206 0 : FD_TEST( 0==fd_ethtool_ioctl_rxfh_get_table( &ioc, rxfh_table, &rxfh_table_ele_cnt ) );
207 0 : for( uint j=0U, q=0U; j<rxfh_table_ele_cnt; j++) {
208 0 : if( rxfh_table[ j ]!=q++ ) return 1;
209 0 : if( q>=channels.current ) q = 0;
210 0 : }
211 :
212 0 : int ntuple_rules_empty;
213 0 : FD_TEST( 0==fd_ethtool_ioctl_ntuple_validate_udp_dport( &ioc, NULL, 0, 0, &ntuple_rules_empty ) );
214 0 : if( !ntuple_rules_empty ) return 1;
215 :
216 0 : return 0;
217 0 : }
218 :
219 : static int
220 : check_device_is_configured( char const * device,
221 : fd_config_t const * config,
222 0 : int dedicated_mode ) {
223 0 : fd_ethtool_ioctl_t ioc __attribute__((cleanup(fd_ethtool_ioctl_fini)));
224 0 : if( FD_UNLIKELY( &ioc != fd_ethtool_ioctl_init( &ioc, device ) ) )
225 0 : FD_LOG_ERR(( "error configuring network device (%s), unable to init ethtool ioctl", device ));
226 :
227 0 : fd_ethtool_ioctl_channels_t channels;
228 0 : FD_TEST( 0==fd_ethtool_ioctl_channels_get_num( &ioc, &channels ) );
229 0 : if( channels.current!=(dedicated_mode ? channels.max : config->layout.net_tile_count) ) return 0;
230 :
231 0 : uint rxfh_table[ FD_ETHTOOL_MAX_RXFH_TABLE_CNT ] = { 0 };
232 0 : uint rxfh_table_ele_cnt;
233 0 : FD_TEST( 0==fd_ethtool_ioctl_rxfh_get_table( &ioc, rxfh_table, &rxfh_table_ele_cnt ) );
234 0 : int rxfh_error = (dedicated_mode && 0U==rxfh_table_ele_cnt);
235 0 : for( uint j=0U, q=!!dedicated_mode; !rxfh_error && j<rxfh_table_ele_cnt; j++) {
236 0 : rxfh_error = (rxfh_table[ j ]!=q++);
237 0 : if( q>=channels.current ) q = !!dedicated_mode;
238 0 : }
239 0 : if( rxfh_error ) return 0;
240 :
241 0 : if( dedicated_mode ) {
242 0 : int ntuple_feature_active;
243 0 : FD_TEST( 0==fd_ethtool_ioctl_feature_test( &ioc, FD_ETHTOOL_FEATURE_NTUPLE, &ntuple_feature_active ) );
244 0 : if( !ntuple_feature_active ) return 0;
245 0 : }
246 :
247 0 : if( !dedicated_mode ) {
248 0 : int ntuple_rules_empty;
249 0 : FD_TEST( 0==fd_ethtool_ioctl_ntuple_validate_udp_dport( &ioc, NULL, 0, 0, &ntuple_rules_empty ) );
250 0 : if( !ntuple_rules_empty ) return 0;
251 0 : } else {
252 : /* FIXME Centrally define listen port list to avoid this configure
253 : stage from going out of sync with port mappings. */
254 0 : uint num_ports = 0;
255 0 : ushort ports[ 32 ];
256 0 : ports[ num_ports++ ] = config->tiles.shred.shred_listen_port;
257 0 : ports[ num_ports++ ] = config->tiles.quic.quic_transaction_listen_port;
258 0 : ports[ num_ports++ ] = config->tiles.quic.regular_transaction_listen_port;
259 0 : if( config->is_firedancer ) {
260 0 : ports[ num_ports++ ] = config->gossip.port;
261 0 : ports[ num_ports++ ] = config->tiles.repair.repair_intake_listen_port;
262 0 : ports[ num_ports++ ] = config->tiles.repair.repair_serve_listen_port;
263 0 : ports[ num_ports++ ] = config->tiles.send.send_src_port;
264 0 : }
265 0 : int ports_valid;
266 0 : FD_TEST( 0==fd_ethtool_ioctl_ntuple_validate_udp_dport( &ioc, ports, num_ports, 0, &ports_valid ));
267 0 : if( !ports_valid ) return 0;
268 0 : }
269 :
270 0 : return 1;
271 0 : }
272 :
273 : static configure_result_t
274 0 : check( fd_config_t const * config ) {
275 0 : int only_dedicated =
276 0 : (0==strcmp( config->net.xdp.rss_queue_mode, "dedicated" ));
277 0 : int check_dedicated = only_dedicated ||
278 0 : (0==strcmp( config->net.xdp.rss_queue_mode, "auto" ) && 1UL==config->layout.net_tile_count );
279 :
280 0 : int is_bonded = device_is_bonded( config->net.interface );
281 0 : char line[ 4096 ];
282 0 : char const * bond_devices[ 16 ];
283 0 : if( is_bonded ) device_read_slaves( config->net.interface, line, bond_devices );
284 :
285 0 : if( check_dedicated ) {
286 0 : int is_configured = 1;
287 0 : if( is_bonded ) {
288 0 : for( ulong i=0UL; is_configured && bond_devices[ i ]!=NULL; i++ ) {
289 0 : is_configured = check_device_is_configured( bond_devices[ i ], config, 1 );
290 0 : }
291 0 : } else {
292 0 : is_configured = check_device_is_configured( config->net.interface, config, 1 );
293 0 : }
294 0 : if( is_configured ) CONFIGURE_OK();
295 0 : }
296 :
297 0 : if( !only_dedicated ) {
298 0 : int is_configured = 1;
299 0 : if( is_bonded ) {
300 0 : for( ulong i=0UL; is_configured && bond_devices[ i ]!=NULL; i++ ) {
301 0 : is_configured = check_device_is_configured( bond_devices[ i ], config, 0 );
302 0 : }
303 0 : } else {
304 0 : is_configured = check_device_is_configured( config->net.interface, config, 0 );
305 0 : }
306 0 : if( is_configured ) CONFIGURE_OK();
307 0 : }
308 :
309 0 : int is_modified = 0;
310 0 : if( is_bonded ) {
311 0 : for( ulong i=0UL; !is_modified && bond_devices[ i ]!=NULL; i++ ) {
312 0 : is_modified = check_device_is_modified( bond_devices[ i ] );
313 0 : }
314 0 : } else {
315 0 : is_modified = check_device_is_modified( config->net.interface );
316 0 : }
317 0 : if( is_modified )
318 0 : PARTIALLY_CONFIGURED( "device `%s` has partial ethtool-channels network configuration", config->net.interface );
319 :
320 0 : NOT_CONFIGURED( "device `%s` missing ethtool-channels network configuration", config->net.interface );
321 0 : }
322 :
323 : static int
324 0 : fini_device( char const * device ) {
325 0 : int error = 0;
326 :
327 0 : fd_ethtool_ioctl_t ioc;
328 0 : if( FD_UNLIKELY( &ioc != fd_ethtool_ioctl_init( &ioc, device ) ) )
329 0 : FD_LOG_ERR(( "error configuring network device (%s), unable to init ethtool ioctl", device ));
330 :
331 : /* It may be the case for certain devices that the default state is
332 : the same as the init'd state (in simple mode). In this case the
333 : following fini commands will all be noops, which is fine. But we
334 : need to return 0 so that the configure stage logic does not
335 : consider this to be an error. We compare the state before and
336 : after to see if anything was changed by fini. */
337 0 : fd_ethtool_ioctl_channels_t channels_orig;
338 0 : error |= (0!=fd_ethtool_ioctl_channels_get_num( &ioc, &channels_orig ));
339 0 : uint rxfh_table_orig[ FD_ETHTOOL_MAX_RXFH_TABLE_CNT ] = { 0 };
340 0 : uint rxfh_table_orig_ele_cnt;
341 0 : error |= (0!=fd_ethtool_ioctl_rxfh_get_table( &ioc, rxfh_table_orig, &rxfh_table_orig_ele_cnt ));
342 0 : int ntuple_rules_empty_orig;
343 0 : error |= (0!=fd_ethtool_ioctl_ntuple_validate_udp_dport( &ioc, NULL, 0, 0, &ntuple_rules_empty_orig ));
344 0 : if( FD_UNLIKELY( error ) )
345 0 : FD_LOG_ERR(( "error configuring network device (%s), unable to determine initial state", device ));
346 :
347 : /* This should happen first, otherwise changing the number of channels may fail */
348 0 : error |= (0!=fd_ethtool_ioctl_rxfh_set_default( &ioc ));
349 :
350 0 : error |= (0!=fd_ethtool_ioctl_channels_set_num( &ioc, 0 /* max */ ));
351 :
352 : /* Some drivers (i40e) do not always evenly redistribute the RXFH table
353 : when increasing the channel count, so we run this again just in case. */
354 0 : error |= (0!=fd_ethtool_ioctl_rxfh_set_default( &ioc ));
355 :
356 : /* Note: We leave the ntuple feature flag as-is in fini */
357 0 : error |= (0!=fd_ethtool_ioctl_ntuple_clear( &ioc ));
358 :
359 0 : if( FD_UNLIKELY( error ) )
360 0 : FD_LOG_ERR(( "error configuring network device (%s), unable to set to default state", device ));
361 :
362 0 : fd_ethtool_ioctl_channels_t channels_new;
363 0 : error |= (0!=fd_ethtool_ioctl_channels_get_num( &ioc, &channels_new ));
364 0 : uint rxfh_table_new[ FD_ETHTOOL_MAX_RXFH_TABLE_CNT ] = { 0 };
365 0 : uint rxfh_table_new_ele_cnt;
366 0 : error |= (0!=fd_ethtool_ioctl_rxfh_get_table( &ioc, rxfh_table_new, &rxfh_table_new_ele_cnt ));
367 0 : int ntuple_rules_empty_new;
368 0 : error |= (0!=fd_ethtool_ioctl_ntuple_validate_udp_dport( &ioc, NULL, 0, 0, &ntuple_rules_empty_new ));
369 0 : if( FD_UNLIKELY( error ) )
370 0 : FD_LOG_ERR(( "error configuring network device (%s), unable to determine final state", device ));
371 :
372 0 : fd_ethtool_ioctl_fini( &ioc );
373 :
374 0 : int modified = (0!=memcmp( &channels_orig, &channels_new, sizeof(fd_ethtool_ioctl_channels_t) )) ||
375 0 : (rxfh_table_orig_ele_cnt != rxfh_table_new_ele_cnt) ||
376 0 : (0!=memcmp( rxfh_table_orig, rxfh_table_new, rxfh_table_orig_ele_cnt * sizeof(uint) )) ||
377 0 : (ntuple_rules_empty_orig!=ntuple_rules_empty_new);
378 0 : return modified;
379 0 : }
380 :
381 : static int
382 : fini( fd_config_t const * config,
383 0 : int pre_init FD_PARAM_UNUSED ) {
384 0 : int done = 0;
385 0 : if( FD_UNLIKELY( device_is_bonded( config->net.interface ) ) ) {
386 0 : char line[ 4096 ];
387 0 : char const * bond_devices[ 16 ];
388 0 : device_read_slaves( config->net.interface, line, bond_devices );
389 0 : for( ulong i=0UL; bond_devices[ i ]!=NULL; i++ ) {
390 0 : done |= fini_device( bond_devices[ i ] );
391 0 : }
392 0 : } else {
393 0 : done = fini_device( config->net.interface );
394 0 : }
395 0 : return done;
396 0 : }
397 :
398 : configure_stage_t fd_cfg_stage_ethtool_channels = {
399 : .name = NAME,
400 : .always_recreate = 0,
401 : .enabled = enabled,
402 : .init_perm = init_perm,
403 : .fini_perm = fini_perm,
404 : .init = init,
405 : .fini = fini,
406 : .check = check,
407 : };
408 :
409 : #undef NAME
|