Line data Source code
1 : /* Topology support routines for the net tile */
2 :
3 : #define _GNU_SOURCE
4 : #include "fd_net_tile.h"
5 : #include "../topo/fd_topob.h"
6 : #include "../netlink/fd_netlink_tile.h"
7 : #include "../../app/shared/fd_config.h" /* FIXME layering violation */
8 : #include "../../util/pod/fd_pod_format.h"
9 : #include "fd_linux_bond.h"
10 :
11 : #include <errno.h>
12 : #include <net/if.h>
13 : #include <unistd.h>
14 :
15 : static void
16 : setup_xdp_tile( fd_topo_t * topo,
17 : ulong tile_kind_id,
18 : fd_topo_tile_t * netlink_tile,
19 : ulong const * tile_to_cpu,
20 : fd_config_net_t const * net_cfg,
21 : char const * if_phys,
22 : ulong if_queue,
23 0 : int xsk_core_dump ) {
24 0 : fd_topo_tile_t * tile = fd_topob_tile( topo, "net", "net", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0, 0 );
25 0 : fd_topob_link( topo, "net_netlnk", "net_netlnk", 128UL, 0UL, 0UL );
26 0 : fd_topob_tile_in( topo, "netlnk", 0UL, "metric_in", "net_netlnk", tile_kind_id, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
27 0 : fd_topob_tile_out( topo, "net", tile_kind_id, "net_netlnk", tile_kind_id );
28 0 : fd_netlink_topo_join( topo, netlink_tile, tile );
29 :
30 0 : fd_topo_obj_t * umem_obj = fd_topob_obj( topo, "dcache", "net_umem" );
31 0 : fd_topob_tile_uses( topo, tile, umem_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
32 0 : fd_pod_insertf_ulong( topo->props, umem_obj->id, "net.%lu.umem", tile_kind_id );
33 :
34 0 : FD_STATIC_ASSERT( sizeof(tile->xdp.if_virt)==IF_NAMESIZE, str_bounds );
35 0 : fd_cstr_ncpy( tile->xdp.if_virt, net_cfg->interface, IF_NAMESIZE );
36 0 : tile->net.bind_address = net_cfg->bind_address_parsed;
37 :
38 0 : FD_STATIC_ASSERT( sizeof(tile->xdp.if_phys)==IF_NAMESIZE, str_bounds );
39 0 : fd_cstr_ncpy( tile->xdp.if_phys, if_phys, IF_NAMESIZE );
40 0 : tile->xdp.if_queue = (uint)if_queue;
41 :
42 0 : tile->xdp.tx_flush_timeout_ns = (long)net_cfg->xdp.flush_timeout_micros * 1000L;
43 0 : tile->xdp.xdp_rx_queue_size = net_cfg->xdp.xdp_rx_queue_size;
44 0 : tile->xdp.xdp_tx_queue_size = net_cfg->xdp.xdp_tx_queue_size;
45 0 : tile->xdp.zero_copy = net_cfg->xdp.xdp_zero_copy;
46 0 : fd_cstr_ncpy( tile->xdp.xdp_mode, net_cfg->xdp.xdp_mode, sizeof(tile->xdp.xdp_mode) );
47 :
48 0 : fd_cstr_ncpy( tile->xdp.poll_mode, net_cfg->xdp.poll_mode, sizeof(tile->xdp.poll_mode) );
49 :
50 0 : tile->xdp.net.umem_dcache_obj_id = umem_obj->id;
51 0 : tile->xdp.netdev_tbl_obj_id = netlink_tile->netlink.netdev_tbl_obj_id;
52 0 : tile->xdp.fib4_main_obj_id = netlink_tile->netlink.fib4_main_obj_id;
53 0 : tile->xdp.fib4_local_obj_id = netlink_tile->netlink.fib4_local_obj_id;
54 0 : tile->xdp.neigh4_obj_id = netlink_tile->netlink.neigh4_obj_id;
55 :
56 0 : tile->xdp.xsk_core_dump = xsk_core_dump;
57 :
58 : /* Allocate free ring */
59 :
60 0 : tile->xdp.free_ring_depth = tile->xdp.xdp_tx_queue_size;
61 0 : if( tile_kind_id==0 ) {
62 : /* Allocate additional frames for loopback */
63 0 : tile->xdp.free_ring_depth += 16384UL;
64 0 : }
65 0 : }
66 :
67 : static void
68 : setup_sock_tile( fd_topo_t * topo,
69 : ulong const * tile_to_cpu,
70 0 : fd_config_net_t const * net_cfg ) {
71 0 : fd_topo_tile_t * tile = fd_topob_tile( topo, "sock", "sock", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0, 0 );
72 0 : tile->sock.net.bind_address = net_cfg->bind_address_parsed;
73 :
74 0 : if( FD_UNLIKELY( net_cfg->socket.receive_buffer_size>INT_MAX ) ) FD_LOG_ERR(( "invalid [net.socket.receive_buffer_size]" ));
75 0 : if( FD_UNLIKELY( net_cfg->socket.send_buffer_size >INT_MAX ) ) FD_LOG_ERR(( "invalid [net.socket.send_buffer_size]" ));
76 0 : tile->sock.so_rcvbuf = (int)net_cfg->socket.receive_buffer_size;
77 0 : tile->sock.so_sndbuf = (int)net_cfg->socket.send_buffer_size ;
78 0 : }
79 :
80 : void
81 : fd_topos_net_tiles( fd_topo_t * topo,
82 : ulong net_tile_cnt,
83 : fd_config_net_t const * net_cfg,
84 : ulong netlnk_max_routes,
85 : ulong netlnk_max_peer_routes,
86 : ulong netlnk_max_neighbors,
87 : int xsk_core_dump,
88 0 : ulong const tile_to_cpu[ FD_TILE_MAX ] ) {
89 : /* net_umem: Packet buffers */
90 0 : fd_topob_wksp( topo, "net_umem" );
91 :
92 : /* Create workspaces */
93 :
94 0 : if( 0==strcmp( net_cfg->provider, "xdp" ) ) {
95 :
96 : /* net: private working memory of the net tiles */
97 0 : fd_topob_wksp( topo, "net" );
98 : /* netlnk: private working memory of the netlnk tile */
99 0 : fd_topob_wksp( topo, "netlnk" );
100 : /* netbase: shared network config (config plane) */
101 0 : fd_topob_wksp( topo, "netbase" );
102 : /* net_netlnk: net->netlnk ARP requests */
103 0 : fd_topob_wksp( topo, "net_netlnk" );
104 :
105 0 : fd_topo_tile_t * netlink_tile = fd_topob_tile( topo, "netlnk", "netlnk", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0, 0 );
106 0 : fd_netlink_topo_create( netlink_tile, topo, netlnk_max_routes, netlnk_max_peer_routes, netlnk_max_neighbors, net_cfg->interface );
107 :
108 : /* Enumerate network devices to attach to */
109 0 : uint devices[ FD_NET_BOND_SLAVE_MAX ] = {0};
110 0 : uint device_cnt = 1U;
111 0 : if( net_cfg->xdp.native_bond && fd_bonding_is_master( net_cfg->interface ) ) {
112 0 : fd_bonding_slave_iter_t iter_[1];
113 0 : fd_bonding_slave_iter_t * iter = fd_bonding_slave_iter_init( iter_, net_cfg->interface );
114 0 : uint slave_cnt;
115 0 : for( slave_cnt=0U;
116 0 : /* */ !fd_bonding_slave_iter_done( iter );
117 0 : slave_cnt++, fd_bonding_slave_iter_next( iter ) ) {
118 0 : if( FD_UNLIKELY( slave_cnt>=FD_NET_BOND_SLAVE_MAX ) ) {
119 0 : FD_LOG_ERR(( "bond interface %s has too many slave devices; max is %u (see [net.xdp.native_bond])",
120 0 : net_cfg->interface, FD_NET_BOND_SLAVE_MAX ));
121 0 : }
122 0 : uint if_idx = if_nametoindex( fd_bonding_slave_iter_ele( iter ) );
123 0 : if( FD_UNLIKELY( !if_idx ) ) FD_LOG_ERR(( "if_nametoindex(%s) failed", fd_bonding_slave_iter_ele( iter ) ));
124 0 : devices[ slave_cnt ] = if_idx;
125 0 : }
126 0 : if( slave_cnt==0 ) {
127 0 : FD_LOG_ERR(( "no bond slave devices detected on interface %s (see [net.xdp.native_bond])", net_cfg->interface ));
128 0 : }
129 0 : device_cnt = (uint)slave_cnt;
130 0 : } else {
131 0 : devices[ 0 ] = if_nametoindex( net_cfg->interface );
132 0 : if( FD_UNLIKELY( !devices[ 0 ] ) ) FD_LOG_ERR(( "unsupported [net.interface]: `%s`", net_cfg->interface ));
133 0 : device_cnt = 1U;
134 0 : }
135 :
136 : /* Verify that net_tile_cnt is a multiple of device_cnt */
137 0 : if( FD_UNLIKELY( net_tile_cnt%device_cnt!=0 ) ) {
138 0 : FD_LOG_ERR(( "net tile count %lu must be a multiple of the number of slave devices %u (incompatible settings [layout.net_tile_count] and [net.xdp.native_bond])", net_tile_cnt, device_cnt ));
139 0 : }
140 0 : uint dev_queue_cnt = (uint)net_tile_cnt/device_cnt;
141 :
142 : /* Assign XDP tiles to device queues */
143 0 : ulong tile_kind_id = 0UL;
144 0 : for( uint i=0UL; i<device_cnt; i++ ) {
145 0 : char if_name[ IF_NAMESIZE ];
146 0 : if( FD_UNLIKELY( !if_indextoname( devices[ i ], if_name ) ) ) {
147 0 : FD_LOG_ERR(( "error initializing network stack: if_indextoname(%u) failed (try disabling [net.xdp.native_bond]?)", i ));
148 0 : }
149 0 : for( ulong j=0UL; j<dev_queue_cnt; j++ ) {
150 0 : setup_xdp_tile( topo, tile_kind_id++, netlink_tile, tile_to_cpu, net_cfg, if_name, (uint)j, xsk_core_dump );
151 0 : }
152 0 : }
153 0 : FD_TEST( tile_kind_id==net_tile_cnt );
154 :
155 0 : } else if( 0==strcmp( net_cfg->provider, "socket" ) ) {
156 :
157 : /* sock: private working memory of the sock tiles */
158 0 : fd_topob_wksp( topo, "sock" );
159 :
160 0 : for( ulong i=0UL; i<net_tile_cnt; i++ ) {
161 0 : setup_sock_tile( topo, tile_to_cpu, net_cfg );
162 0 : }
163 :
164 0 : } else {
165 0 : FD_LOG_ERR(( "invalid `net.provider`" ));
166 0 : }
167 0 : }
168 :
169 : static int
170 0 : topo_is_xdp( fd_topo_t * topo ) {
171 : /* FIXME hacky */
172 0 : for( ulong j=0UL; j<(topo->tile_cnt); j++ ) {
173 0 : if( 0==strcmp( topo->tiles[ j ].name, "net" ) ) {
174 0 : return 1;
175 0 : }
176 0 : }
177 0 : return 0;
178 0 : }
179 :
180 : static void
181 : add_xdp_rx_link( fd_topo_t * topo,
182 : char const * link_name,
183 : ulong net_kind_id,
184 0 : ulong depth ) {
185 0 : if( FD_UNLIKELY( !topo || !link_name ) ) FD_LOG_ERR(( "NULL args" ));
186 0 : if( FD_UNLIKELY( strlen( link_name )>=sizeof(topo->links[ topo->link_cnt ].name ) ) ) FD_LOG_ERR(( "link name too long: %s", link_name ));
187 0 : if( FD_UNLIKELY( topo->link_cnt>=FD_TOPO_MAX_LINKS ) ) FD_LOG_ERR(( "too many links" ));
188 :
189 0 : ulong kind_id = 0UL;
190 0 : for( ulong i=0UL; i<topo->link_cnt; i++ ) {
191 0 : if( !strcmp( topo->links[ i ].name, link_name ) ) kind_id++;
192 0 : }
193 :
194 0 : fd_topo_link_t * link = &topo->links[ topo->link_cnt ];
195 0 : strncpy( link->name, link_name, sizeof(link->name) );
196 0 : link->id = topo->link_cnt;
197 0 : link->kind_id = kind_id;
198 0 : link->depth = depth;
199 0 : link->mtu = FD_NET_MTU;
200 0 : link->burst = 0UL;
201 :
202 0 : fd_topo_obj_t * obj = fd_topob_obj( topo, "mcache", "net_umem" );
203 0 : link->mcache_obj_id = obj->id;
204 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, depth, "obj.%lu.depth", obj->id ) );
205 :
206 0 : link->dcache_obj_id = fd_pod_queryf_ulong( topo->props, ULONG_MAX, "net.%lu.umem", net_kind_id );
207 0 : if( FD_UNLIKELY( link->dcache_obj_id==ULONG_MAX ) ) FD_LOG_ERR(( "umem dcache not found for net %lu", net_kind_id ));
208 :
209 0 : topo->link_cnt++;
210 0 : }
211 :
212 : void
213 : fd_topos_net_rx_link( fd_topo_t * topo,
214 : char const * link_name,
215 : ulong net_kind_id,
216 0 : ulong depth ) {
217 0 : if( topo_is_xdp( topo ) ) {
218 0 : add_xdp_rx_link( topo, link_name, net_kind_id, depth );
219 0 : fd_topob_tile_out( topo, "net", net_kind_id, link_name, net_kind_id );
220 0 : } else {
221 0 : fd_topob_link( topo, link_name, "net_umem", depth, FD_NET_MTU, 64 );
222 0 : fd_topob_tile_out( topo, "sock", net_kind_id, link_name, net_kind_id );
223 0 : }
224 0 : }
225 :
226 : void
227 : fd_topos_tile_in_net( fd_topo_t * topo,
228 : char const * fseq_wksp,
229 : char const * link_name,
230 : ulong link_kind_id,
231 : int reliable,
232 0 : int polled ) {
233 0 : for( ulong j=0UL; j<(topo->tile_cnt); j++ ) {
234 0 : if( 0==strcmp( topo->tiles[ j ].name, "net" ) ||
235 0 : 0==strcmp( topo->tiles[ j ].name, "sock" ) ) {
236 0 : fd_topob_tile_in( topo, topo->tiles[ j ].name, topo->tiles[ j ].kind_id, fseq_wksp, link_name, link_kind_id, reliable, polled );
237 0 : }
238 0 : }
239 0 : }
240 :
241 : void
242 : fd_topos_net_tile_finish( fd_topo_t * topo,
243 0 : ulong net_kind_id ) {
244 0 : if( !topo_is_xdp( topo ) ) return;
245 :
246 0 : fd_topo_tile_t * net_tile = &topo->tiles[ fd_topo_find_tile( topo, "net", net_kind_id ) ];
247 :
248 0 : ulong rx_depth = net_tile->xdp.xdp_rx_queue_size;
249 0 : ulong tx_depth = net_tile->xdp.xdp_tx_queue_size;
250 0 : rx_depth += (rx_depth/2UL);
251 0 : tx_depth += (tx_depth/2UL);
252 :
253 0 : if( net_kind_id==0 ) {
254 : /* Double it for loopback XSK */
255 0 : rx_depth *= 2UL;
256 0 : tx_depth *= 2UL;
257 0 : }
258 :
259 0 : ulong cum_frame_cnt = rx_depth + tx_depth;
260 :
261 : /* Count up the depth of all RX mcaches */
262 :
263 0 : for( ulong j=0UL; j<(net_tile->out_cnt); j++ ) {
264 0 : ulong link_id = net_tile->out_link_id[ j ];
265 0 : ulong mcache_obj_id = topo->links[ link_id ].mcache_obj_id;
266 0 : ulong depth = fd_pod_queryf_ulong( topo->props, ULONG_MAX, "obj.%lu.depth", mcache_obj_id );
267 0 : if( FD_UNLIKELY( depth==ULONG_MAX ) ) FD_LOG_ERR(( "Didn't find depth for mcache %s", topo->links[ link_id ].name ));
268 0 : cum_frame_cnt += depth + 1UL;
269 0 : }
270 :
271 : /* Create a dcache object */
272 :
273 0 : ulong umem_obj_id = fd_pod_queryf_ulong( topo->props, ULONG_MAX, "net.%lu.umem", net_kind_id );
274 0 : FD_TEST( umem_obj_id!=ULONG_MAX );
275 :
276 0 : FD_TEST( net_tile->net.umem_dcache_obj_id > 0 );
277 0 : fd_pod_insertf_ulong( topo->props, cum_frame_cnt, "obj.%lu.depth", umem_obj_id );
278 0 : fd_pod_insertf_ulong( topo->props, 2UL, "obj.%lu.burst", umem_obj_id ); /* 4096 byte padding */
279 0 : fd_pod_insertf_ulong( topo->props, 2048UL, "obj.%lu.mtu", umem_obj_id );
280 0 : }
281 :
282 : void
283 : fd_topo_install_xdp( fd_topo_t const * topo,
284 : fd_xdp_fds_t * fds,
285 : uint * fds_cnt,
286 : uint bind_addr,
287 0 : int dry_run ) {
288 0 : uint fds_max = *fds_cnt;
289 0 : memset( fds, 0, fds_max*sizeof(fd_xdp_fds_t) );
290 :
291 0 : uint if_cnt = 0U;
292 0 : # define ADD_IF_IDX( idx_ ) do { \
293 0 : uint idx = (idx_); \
294 0 : int found = 0; \
295 0 : for( uint i=0U; i<if_cnt; i++ ) { \
296 0 : if( fds[ i ].if_idx==idx ) { \
297 0 : found = 1; \
298 0 : break; \
299 0 : } \
300 0 : } \
301 0 : if( !found ) { \
302 0 : FD_TEST( if_cnt<FD_NET_BOND_SLAVE_MAX+1 ); \
303 0 : fds[ if_cnt++ ].if_idx = idx; \
304 0 : } \
305 0 : } while(0)
306 :
307 : /* Create a list of unique fds */
308 :
309 0 : ulong net_tile_cnt = fd_topo_tile_name_cnt( topo, "net" );
310 0 : for( ulong tile_kind_id=0UL; tile_kind_id<net_tile_cnt; tile_kind_id++ ) {
311 0 : ulong net_tile_id = fd_topo_find_tile( topo, "net", tile_kind_id );
312 0 : FD_TEST( net_tile_id!=ULONG_MAX );
313 0 : fd_topo_tile_t const * tile = &topo->tiles[ net_tile_id ];
314 0 : uint if_idx = if_nametoindex( tile->xdp.if_phys ); FD_TEST( if_idx );
315 0 : ADD_IF_IDX( if_idx );
316 0 : }
317 :
318 : /* Add loopback unless found */
319 :
320 0 : uint lo_idx = if_nametoindex( "lo" ); FD_TEST( lo_idx );
321 0 : ADD_IF_IDX( lo_idx );
322 :
323 : /* Done with config discovery */
324 :
325 0 : *fds_cnt = if_cnt;
326 0 : int next_fd = 123462;
327 0 : for( uint i=0U; i<if_cnt; i++ ) {
328 0 : fds[ i ].xsk_map_fd = next_fd++;
329 0 : fds[ i ].prog_link_fd = next_fd++;
330 0 : }
331 0 : if( dry_run ) return;
332 :
333 : /* Install */
334 :
335 0 : ulong net0_tile_idx = fd_topo_find_tile( topo, "net", 0UL );
336 0 : FD_TEST( net0_tile_idx!=ULONG_MAX );
337 0 : fd_topo_tile_t const * net0_tile = &topo->tiles[ net0_tile_idx ];
338 :
339 0 : ushort udp_port_candidates[] = {
340 0 : (ushort)net0_tile->xdp.net.legacy_transaction_listen_port,
341 0 : (ushort)net0_tile->xdp.net.quic_transaction_listen_port,
342 0 : (ushort)net0_tile->xdp.net.shred_listen_port,
343 0 : (ushort)net0_tile->xdp.net.gossip_listen_port,
344 0 : (ushort)net0_tile->xdp.net.repair_client_listen_port,
345 0 : (ushort)net0_tile->xdp.net.repair_serve_listen_port,
346 0 : (ushort)net0_tile->xdp.net.txsend_src_port,
347 0 : };
348 :
349 0 : for( uint i=0U; i<if_cnt; i++ ) {
350 : /* Override XDP mode for loopback */
351 0 : char const * xdp_mode = net0_tile->xdp.xdp_mode;
352 0 : if( fds[ i ].if_idx==1U ) xdp_mode = "skb";
353 :
354 0 : fd_xdp_fds_t xdp_fds = fd_xdp_install(
355 0 : fds[ i ].if_idx,
356 0 : bind_addr,
357 0 : sizeof(udp_port_candidates)/sizeof(udp_port_candidates[0]),
358 0 : udp_port_candidates,
359 0 : xdp_mode );
360 0 : if( FD_UNLIKELY( -1==dup2( xdp_fds.xsk_map_fd, fds[ i ].xsk_map_fd ) ) ) {
361 0 : FD_LOG_ERR(( "dup2() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
362 0 : }
363 0 : if( FD_UNLIKELY( -1==close( xdp_fds.xsk_map_fd ) ) ) {
364 0 : FD_LOG_ERR(( "close() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
365 0 : }
366 0 : if( FD_UNLIKELY( -1==dup2( xdp_fds.prog_link_fd, fds[ i ].prog_link_fd ) ) ) {
367 0 : FD_LOG_ERR(( "dup2() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
368 0 : }
369 0 : if( FD_UNLIKELY( -1==close( xdp_fds.prog_link_fd ) ) ) {
370 0 : FD_LOG_ERR(( "close() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
371 0 : }
372 0 : }
373 :
374 0 : # undef ADD_IF_IDX
375 0 : }
|