Line data Source code
1 : #include "fd_netlink_tile_private.h"
2 : #include "../topo/fd_topo.h"
3 : #include "../topo/fd_topob.h"
4 : #include "../metrics/fd_metrics.h"
5 : #include "../../waltz/ip/fd_fib4_netlink.h"
6 : #include "../../waltz/mib/fd_netdev_netlink.h"
7 : #include "../../waltz/neigh/fd_neigh4_netlink.h"
8 : #include "../../util/pod/fd_pod_format.h"
9 : #include "../../util/log/fd_dtrace.h"
10 : #include "fd_netlink_tile.h"
11 :
12 : #include <errno.h>
13 : #include <net/if.h>
14 : #include <netinet/in.h> /* MSG_DONTWAIT */
15 : #include <sys/socket.h> /* SOL_{...} */
16 : #include <sys/random.h> /* getrandom */
17 : #include <sys/time.h> /* struct timeval */
18 : #include <linux/rtnetlink.h> /* RTM_{...} */
19 :
20 : #define FD_SOCKADDR_IN_SZ sizeof(struct sockaddr_in)
21 : #include "generated/netlink_seccomp.h"
22 :
23 : void
24 : fd_netlink_topo_create( fd_topo_tile_t * netlink_tile,
25 : fd_topo_t * topo,
26 : ulong netlnk_max_routes,
27 : ulong netlnk_max_peer_routes,
28 : ulong netlnk_max_neighbors,
29 0 : char const * bind_interface ) {
30 0 : fd_topo_obj_t * netdev_dbl_buf_obj = fd_topob_obj( topo, "dbl_buf", "netbase" );
31 0 : fd_topo_obj_t * fib4_main_obj = fd_topob_obj( topo, "fib4", "netbase" );
32 0 : fd_topo_obj_t * fib4_local_obj = fd_topob_obj( topo, "fib4", "netbase" );
33 0 : fd_topo_obj_t * neigh4_obj = fd_topob_obj( topo, "neigh4_hmap", "netbase" );
34 0 : fd_topo_obj_t * neigh4_ele_obj = fd_topob_obj( topo, "opaque", "netbase" );
35 :
36 0 : fd_topob_tile_uses( topo, netlink_tile, netdev_dbl_buf_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
37 0 : fd_topob_tile_uses( topo, netlink_tile, fib4_main_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
38 0 : fd_topob_tile_uses( topo, netlink_tile, fib4_local_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
39 0 : fd_topob_tile_uses( topo, netlink_tile, neigh4_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
40 0 : fd_topob_tile_uses( topo, netlink_tile, neigh4_ele_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
41 :
42 : /* Configure double buffer of netdev table */
43 0 : ulong const netdev_dbl_buf_mtu = fd_netdev_tbl_footprint( NETDEV_MAX, BOND_MASTER_MAX );
44 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, netdev_dbl_buf_mtu, "obj.%lu.mtu", netdev_dbl_buf_obj->id ) );
45 :
46 : /* Configure route table */
47 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, netlnk_max_routes, "obj.%lu.route_max", fib4_main_obj->id ) );
48 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, netlnk_max_routes, "obj.%lu.route_max", fib4_local_obj->id ) );
49 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, netlnk_max_peer_routes, "obj.%lu.route_peer_max", fib4_main_obj->id ) );
50 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, netlnk_max_peer_routes, "obj.%lu.route_peer_max", fib4_local_obj->id ) );
51 0 : ulong fib4_seed;
52 0 : FD_TEST( 8UL==getrandom( &fib4_seed, sizeof(ulong), 0 ) );
53 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, fib4_seed, "obj.%lu.route_peer_seed", fib4_local_obj->id ) );
54 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, fib4_seed, "obj.%lu.route_peer_seed", fib4_main_obj->id ) );
55 :
56 : /* Configure neighbor hashmap: Open addressed hashmap with 3.0 sparsity
57 : factor and 16 long probe chain */
58 0 : ulong const neigh_ele_max = fd_ulong_pow2_up( 3UL * netlnk_max_neighbors );
59 0 : ulong const neigh_ele_align = alignof(fd_neigh4_entry_t);
60 0 : ulong const neigh_ele_fp = neigh_ele_max * sizeof(fd_neigh4_entry_t);
61 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, neigh_ele_max, "obj.%lu.ele_max", neigh4_obj->id ) );
62 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, 16UL, "obj.%lu.probe_max", neigh4_obj->id ) );
63 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, 4UL, "obj.%lu.lock_cnt", neigh4_obj->id ) );
64 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, neigh_ele_align, "obj.%lu.align", neigh4_ele_obj->id ) );
65 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, neigh_ele_fp, "obj.%lu.footprint", neigh4_ele_obj->id ) );
66 :
67 : /* Pick a random hashmap seed */
68 0 : ulong neigh4_seed;
69 0 : FD_TEST( 8UL==getrandom( &neigh4_seed, sizeof(ulong), 0 ) );
70 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, neigh4_seed, "obj.%lu.seed", neigh4_obj->id ) );
71 :
72 0 : netlink_tile->netlink.netdev_dbl_buf_obj_id = netdev_dbl_buf_obj->id;
73 0 : netlink_tile->netlink.fib4_main_obj_id = fib4_main_obj->id;
74 0 : netlink_tile->netlink.fib4_local_obj_id = fib4_local_obj->id;
75 0 : memcpy( netlink_tile->netlink.neigh_if, bind_interface, sizeof(netlink_tile->netlink.neigh_if) );
76 0 : netlink_tile->netlink.neigh4_obj_id = neigh4_obj->id;
77 0 : netlink_tile->netlink.neigh4_ele_obj_id = neigh4_ele_obj->id;
78 0 : }
79 :
80 : void
81 : fd_netlink_topo_join( fd_topo_t * topo,
82 : fd_topo_tile_t * netlink_tile,
83 0 : fd_topo_tile_t * join_tile ) {
84 0 : fd_topob_tile_uses( topo, join_tile, &topo->objs[ netlink_tile->netlink.neigh4_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY );
85 0 : fd_topob_tile_uses( topo, join_tile, &topo->objs[ netlink_tile->netlink.neigh4_ele_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY );
86 0 : fd_topob_tile_uses( topo, join_tile, &topo->objs[ netlink_tile->netlink.fib4_main_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY );
87 0 : fd_topob_tile_uses( topo, join_tile, &topo->objs[ netlink_tile->netlink.fib4_local_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY );
88 0 : }
89 :
90 : /* Begin tile methods */
91 :
92 : FD_FN_CONST static inline ulong
93 0 : scratch_align( void ) {
94 0 : return fd_ulong_max( alignof(fd_netlink_tile_ctx_t), FD_NETDEV_TBL_ALIGN );
95 0 : }
96 :
97 : FD_FN_PURE static inline ulong
98 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
99 0 : (void)tile;
100 0 : ulong l = FD_LAYOUT_INIT;
101 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_netlink_tile_ctx_t), sizeof(fd_netlink_tile_ctx_t) );
102 0 : l = FD_LAYOUT_APPEND( l, fd_netdev_tbl_align(), fd_netdev_tbl_footprint( NETDEV_MAX, BOND_MASTER_MAX ) );
103 0 : return FD_LAYOUT_FINI( l, scratch_align() );
104 0 : }
105 :
106 : static ulong
107 : populate_allowed_seccomp( fd_topo_t const * topo,
108 : fd_topo_tile_t const * tile,
109 : ulong out_cnt,
110 0 : struct sock_filter * out ) {
111 0 : fd_netlink_tile_ctx_t * ctx = fd_topo_obj_laddr( topo, tile->tile_obj_id );
112 0 : FD_TEST( ctx->magic==FD_NETLINK_TILE_CTX_MAGIC );
113 0 : populate_sock_filter_policy_netlink( out_cnt, out, (uint)fd_log_private_logfile_fd(), (uint)ctx->nl_monitor->fd, (uint)ctx->nl_req->fd, (uint)ctx->prober->sock_fd );
114 0 : return sock_filter_policy_netlink_instr_cnt;
115 0 : }
116 :
117 : static ulong
118 : populate_allowed_fds( fd_topo_t const * topo,
119 : fd_topo_tile_t const * tile,
120 : ulong out_fds_cnt,
121 0 : int * out_fds ) {
122 0 : fd_netlink_tile_ctx_t * ctx = fd_topo_obj_laddr( topo, tile->tile_obj_id );
123 0 : FD_TEST( ctx->magic==FD_NETLINK_TILE_CTX_MAGIC );
124 :
125 0 : if( FD_UNLIKELY( out_fds_cnt<5UL ) ) FD_LOG_ERR(( "out_fds_cnt too low (%lu)", out_fds_cnt ));
126 :
127 0 : ulong out_cnt = 0UL;
128 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
129 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
130 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
131 0 : out_fds[ out_cnt++ ] = ctx->nl_monitor->fd;
132 0 : out_fds[ out_cnt++ ] = ctx->nl_req->fd;
133 0 : out_fds[ out_cnt++ ] = ctx->prober->sock_fd;
134 0 : return out_cnt;
135 0 : }
136 :
137 : static void
138 : privileged_init( fd_topo_t * topo,
139 0 : fd_topo_tile_t * tile ) {
140 0 : if( FD_UNLIKELY( tile->kind_id!=0 ) ) {
141 0 : FD_LOG_ERR(( "Topology contains more than one netlink tile" ));
142 0 : }
143 :
144 0 : uint const neigh_if_idx = if_nametoindex( tile->netlink.neigh_if );
145 0 : if( FD_UNLIKELY( !neigh_if_idx ) ) FD_LOG_ERR(( "if_nametoindex(%.16s) failed (%i-%s)", tile->netlink.neigh_if, errno, fd_io_strerror( errno ) ));
146 :
147 0 : fd_netlink_tile_ctx_t * ctx = fd_topo_obj_laddr( topo, tile->tile_obj_id );
148 0 : fd_memset( ctx, 0, sizeof(fd_netlink_tile_ctx_t) );
149 0 : ctx->magic = FD_NETLINK_TILE_CTX_MAGIC;
150 0 : ctx->neigh4_ifidx = neigh_if_idx;
151 :
152 0 : if( FD_UNLIKELY( !fd_netlink_init( ctx->nl_monitor, 1000U ) ) ) {
153 0 : FD_LOG_ERR(( "Failed to connect to rtnetlink" ));
154 0 : }
155 0 : if( FD_UNLIKELY( !fd_netlink_init( ctx->nl_req, 9000000U ) ) ) {
156 0 : FD_LOG_ERR(( "Failed to connect to rtnetlink" ));
157 0 : }
158 :
159 0 : union {
160 0 : struct sockaddr sa;
161 0 : struct sockaddr_nl sanl;
162 0 : } sa;
163 0 : sa.sanl = (struct sockaddr_nl) {
164 0 : .nl_family = AF_NETLINK,
165 0 : .nl_groups = RTMGRP_LINK | RTMGRP_NEIGH | RTMGRP_IPV4_ROUTE
166 0 : };
167 0 : if( FD_UNLIKELY( 0!=bind( ctx->nl_monitor->fd, &sa.sa, sizeof(struct sockaddr_nl) ) ) ) {
168 0 : FD_LOG_ERR(( "bind(sock,RT_NETLINK,RTMGRP_{LINK,NEIGH,IPV4_ROUTE}) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
169 0 : }
170 :
171 0 : float const max_probes_per_second = 3.f;
172 0 : ulong const max_probe_burst = 128UL;
173 0 : float const probe_delay_seconds = 15.f;
174 0 : fd_neigh4_prober_init( ctx->prober, max_probes_per_second, max_probe_burst, probe_delay_seconds );
175 :
176 : /* Set duration of blocking reads in before_credit */
177 0 : struct timeval tv = { .tv_usec = 2000 }; /* 2ms */
178 0 : if( FD_UNLIKELY( 0!=setsockopt( ctx->nl_monitor->fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct timeval) ) ) ) {
179 0 : FD_LOG_ERR(( "setsockopt(sock,SOL_SOCKET,SO_RCVTIMEO) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
180 0 : }
181 0 : }
182 :
183 : static void
184 : unprivileged_init( fd_topo_t * topo,
185 0 : fd_topo_tile_t * tile ) {
186 0 : FD_SCRATCH_ALLOC_INIT( l, fd_topo_obj_laddr( topo, tile->tile_obj_id ) );
187 0 : fd_netlink_tile_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_netlink_tile_ctx_t), sizeof(fd_netlink_tile_ctx_t) );
188 0 : FD_TEST( ctx->magic==FD_NETLINK_TILE_CTX_MAGIC );
189 0 : ctx->netdev_sz = fd_netdev_tbl_footprint( NETDEV_MAX, BOND_MASTER_MAX );
190 0 : ctx->netdev_local = FD_SCRATCH_ALLOC_APPEND( l, fd_netdev_tbl_align(), ctx->netdev_sz );
191 :
192 0 : FD_TEST( tile->netlink.netdev_dbl_buf_obj_id );
193 0 : FD_TEST( tile->netlink.neigh4_obj_id );
194 0 : FD_TEST( tile->netlink.neigh4_ele_obj_id );
195 0 : FD_TEST( tile->netlink.fib4_local_obj_id );
196 0 : FD_TEST( tile->netlink.fib4_main_obj_id );
197 :
198 0 : FD_TEST( fd_netdev_tbl_new( ctx->netdev_local, NETDEV_MAX, BOND_MASTER_MAX ) );
199 0 : FD_TEST( fd_netdev_tbl_join( ctx->netdev_tbl, ctx->netdev_local ) );
200 :
201 0 : FD_TEST( ctx->netdev_buf = fd_dbl_buf_join( fd_topo_obj_laddr( topo, tile->netlink.netdev_dbl_buf_obj_id ) ) );
202 :
203 0 : FD_TEST( fd_neigh4_hmap_join( ctx->neigh4, fd_topo_obj_laddr( topo, tile->netlink.neigh4_obj_id ), fd_topo_obj_laddr( topo, tile->netlink.neigh4_ele_obj_id ) ) );
204 0 : ctx->fib4_local = fd_fib4_join( fd_topo_obj_laddr( topo, tile->netlink.fib4_local_obj_id ) ); FD_TEST( ctx->fib4_local );
205 0 : ctx->fib4_main = fd_fib4_join( fd_topo_obj_laddr( topo, tile->netlink.fib4_main_obj_id ) ); FD_TEST( ctx->fib4_main );
206 :
207 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
208 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
209 0 : if( FD_UNLIKELY( link->mtu!=0UL ) ) FD_LOG_ERR(( "netlink solicit links must have an MTU of zero" ));
210 0 : }
211 :
212 0 : ctx->action |= FD_NET_TILE_ACTION_LINK_UPDATE;
213 0 : ctx->action |= FD_NET_TILE_ACTION_ROUTE4_UPDATE;
214 0 : ctx->action |= FD_NET_TILE_ACTION_NEIGH_UPDATE;
215 :
216 0 : ctx->update_backoff = (long)( fd_tempo_tick_per_ns( NULL ) * 10e6 ); /* 10ms */
217 0 : }
218 :
219 : /* Begin stem methods
220 :
221 : Note: Using stem here might seem odd since fd_netlink_tile does not
222 : send or receive any messages. Use of stem here is justified because of
223 : the initialization, generic metrics, and event loop functionality it
224 : provides. */
225 :
226 : static inline void
227 0 : metrics_write( fd_netlink_tile_ctx_t * ctx ) {
228 0 : FD_MCNT_SET( NETLNK, DROP_EVENTS, fd_netlink_enobufs_cnt );
229 0 : FD_MCNT_SET( NETLNK, LINK_FULL_SYNCS, ctx->metrics.link_full_syncs );
230 0 : FD_MCNT_SET( NETLNK, ROUTE_FULL_SYNCS, ctx->metrics.route_full_syncs );
231 0 : FD_MCNT_ENUM_COPY( NETLNK, UPDATES, ctx->metrics.update_cnt );
232 0 : FD_MGAUGE_SET( NETLNK, INTERFACE_COUNT, ctx->netdev_tbl->hdr->dev_cnt );
233 0 : FD_MGAUGE_SET( NETLNK, ROUTE_COUNT_LOCAL, fd_fib4_cnt( ctx->fib4_local ) );
234 0 : FD_MGAUGE_SET( NETLNK, ROUTE_COUNT_MAIN, fd_fib4_cnt( ctx->fib4_main ) );
235 0 : FD_MCNT_SET( NETLNK, NEIGH_PROBE_SENT, ctx->metrics.neigh_solicits_sent );
236 0 : FD_MCNT_SET( NETLNK, NEIGH_PROBE_FAILS, ctx->metrics.neigh_solicits_fails );
237 0 : FD_MCNT_SET( NETLNK, NEIGH_PROBE_RATE_LIMIT_HOST, ctx->prober->local_rate_limited_cnt );
238 0 : FD_MCNT_SET( NETLNK, NEIGH_PROBE_RATE_LIMIT_GLOBAL, ctx->prober->global_rate_limited_cnt );
239 0 : }
240 :
241 : /* netlink_monitor_read calls recvfrom to process a link, route, or
242 : neighbor update. Returns 1 if a message was read, 0 otherwise. */
243 :
244 : static int
245 : netlink_monitor_read( fd_netlink_tile_ctx_t * ctx,
246 0 : int flags ) {
247 :
248 0 : uchar msg[ 16384 ];
249 0 : long msg_sz = recvfrom( ctx->nl_monitor->fd, msg, sizeof(msg), flags, NULL, NULL );
250 0 : if( msg_sz<=0L ) {
251 0 : if( FD_LIKELY( errno==EAGAIN || errno==EINTR ) ) return 0;
252 0 : if( errno==ENOBUFS ) {
253 0 : fd_netlink_enobufs_cnt++;
254 0 : return 0;
255 0 : }
256 0 : FD_LOG_ERR(( "recvfrom(nl_monitor) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
257 0 : }
258 :
259 0 : struct nlmsghdr * nlh = fd_type_pun( msg );
260 0 : FD_DTRACE_PROBE_4( netlink_update, nlh->nlmsg_seq, nlh->nlmsg_type, nlh->nlmsg_len, nlh->nlmsg_flags );
261 0 : switch( nlh->nlmsg_type ) {
262 0 : case RTM_NEWLINK:
263 0 : case RTM_DELLINK:
264 0 : ctx->action |= FD_NET_TILE_ACTION_LINK_UPDATE;
265 0 : ctx->metrics.update_cnt[ FD_METRICS_ENUM_NETLINK_MSG_V_LINK_IDX ]++;
266 0 : break;
267 0 : case RTM_NEWROUTE:
268 0 : case RTM_DELROUTE:
269 0 : ctx->action |= FD_NET_TILE_ACTION_ROUTE4_UPDATE;
270 0 : ctx->metrics.update_cnt[ FD_METRICS_ENUM_NETLINK_MSG_V_IPV4_ROUTE_IDX ]++;
271 0 : break;
272 0 : case RTM_NEWNEIGH:
273 0 : case RTM_DELNEIGH: {
274 0 : fd_neigh4_netlink_ingest_message( ctx->neigh4, nlh, ctx->neigh4_ifidx );
275 0 : ctx->metrics.update_cnt[ FD_METRICS_ENUM_NETLINK_MSG_V_NEIGH_IDX ]++;
276 0 : break;
277 0 : }
278 0 : default:
279 0 : FD_LOG_INFO(( "Received unexpected netlink message type %u", nlh->nlmsg_type ));
280 0 : break;
281 0 : }
282 :
283 0 : return 1;
284 0 : }
285 :
286 : static void
287 0 : during_housekeeping( fd_netlink_tile_ctx_t * ctx ) {
288 0 : long now = fd_tickcount();
289 0 : if( ctx->action & FD_NET_TILE_ACTION_LINK_UPDATE ) {
290 0 : if( now < ctx->link_update_ts ) return;
291 0 : ctx->action &= ~FD_NET_TILE_ACTION_LINK_UPDATE;
292 0 : fd_netdev_netlink_load_table( ctx->netdev_tbl, ctx->nl_req );
293 0 : fd_dbl_buf_insert( ctx->netdev_buf, ctx->netdev_local, ctx->netdev_sz );
294 0 : ctx->link_update_ts = now+ctx->update_backoff;
295 0 : ctx->metrics.link_full_syncs++;
296 0 : }
297 0 : if( ctx->action & FD_NET_TILE_ACTION_ROUTE4_UPDATE ) {
298 0 : if( now < ctx->route4_update_ts ) return;
299 0 : ctx->action &= ~FD_NET_TILE_ACTION_ROUTE4_UPDATE;
300 0 : fd_fib4_netlink_load_table( ctx->fib4_local, ctx->nl_req, RT_TABLE_LOCAL );
301 0 : fd_fib4_netlink_load_table( ctx->fib4_main, ctx->nl_req, RT_TABLE_MAIN );
302 0 : ctx->route4_update_ts = now+ctx->update_backoff;
303 0 : ctx->metrics.route_full_syncs++;
304 0 : }
305 0 : if( ctx->action & FD_NET_TILE_ACTION_NEIGH_UPDATE ) {
306 0 : ctx->action &= ~FD_NET_TILE_ACTION_NEIGH_UPDATE;
307 0 : fd_neigh4_netlink_request_dump( ctx->nl_req, ctx->neigh4_ifidx );
308 0 : uchar buf[ 4096 ];
309 0 : fd_netlink_iter_t iter[1];
310 0 : for( fd_netlink_iter_init( iter, ctx->nl_req, buf, sizeof(buf) );
311 0 : !fd_netlink_iter_done( iter );
312 0 : fd_netlink_iter_next( iter, ctx->nl_req ) ) {
313 0 : fd_neigh4_netlink_ingest_message( ctx->neigh4, fd_netlink_iter_msg( iter ), ctx->neigh4_ifidx );
314 0 : }
315 0 : }
316 0 : }
317 :
318 : /* before_credit is called once per loop iteration */
319 :
320 : static void
321 : before_credit( fd_netlink_tile_ctx_t * ctx,
322 : fd_stem_context_t * stem FD_PARAM_UNUSED,
323 0 : int * charge_busy ) {
324 :
325 0 : for(;;) {
326 : /* Clear socket buffer */
327 0 : if( !netlink_monitor_read( ctx, MSG_DONTWAIT ) ) break;
328 0 : *charge_busy = 1;
329 0 : }
330 :
331 0 : ctx->idle_cnt++;
332 0 : if( FD_UNLIKELY( ctx->idle_cnt >= 128L ) ) {
333 : /* Blocking read (yield to scheduler) */
334 0 : *charge_busy = 0;
335 0 : netlink_monitor_read( ctx, 0 );
336 0 : }
337 :
338 0 : }
339 :
340 : /* after_poll_overrun is called when fd_stem.c was overrun while
341 : checking for new fragments. This typically happens when
342 : before_credit takes too long (e.g. we were in a blocking netlink
343 : read) */
344 :
345 : static void
346 0 : after_poll_overrun( fd_netlink_tile_ctx_t * ctx ) {
347 0 : ctx->idle_cnt = -1L;
348 0 : }
349 :
350 : /* after_frag handles a neighbor solicit request */
351 :
352 : static void
353 : after_frag( fd_netlink_tile_ctx_t * ctx,
354 : ulong in_idx,
355 : ulong seq,
356 : ulong sig,
357 : ulong sz,
358 : ulong tsorig,
359 : ulong tspub,
360 0 : fd_stem_context_t * stem ) {
361 0 : (void)in_idx; (void)seq; (void)tsorig; (void)tspub; (void)stem;
362 :
363 0 : long now = fd_tickcount();
364 0 : ctx->idle_cnt = -1L;
365 :
366 : /* Parse request (fully contained in sig field) */
367 :
368 0 : if( FD_UNLIKELY( sz!=0UL ) ) {
369 0 : FD_LOG_WARNING(( "unexpected sz %lu", sz ));
370 0 : }
371 0 : if( FD_UNLIKELY( sig>>48 ) ) {
372 0 : FD_LOG_WARNING(( "unexpected high bits in sig %016lx", sig ));
373 0 : }
374 0 : ushort if_idx = (ushort)(sig>>32);
375 0 : uint ip4_addr = (uint)sig;
376 0 : if( FD_UNLIKELY( if_idx!=ctx->neigh4_ifidx ) ) {
377 0 : ctx->metrics.neigh_solicits_fails++;
378 0 : FD_LOG_ERR(( "received neighbor solicit request for invalid interface index %u", if_idx ));
379 0 : return;
380 0 : }
381 :
382 : /* Drop if the kernel is already working on the request */
383 :
384 0 : fd_neigh4_hmap_query_t query[1];
385 0 : int spec_res = fd_neigh4_hmap_query_try( ctx->neigh4, &ip4_addr, NULL, query, 0 );
386 0 : if( spec_res==FD_MAP_SUCCESS ) {
387 0 : ctx->metrics.neigh_solicits_fails++;
388 0 : return;
389 0 : }
390 :
391 : /* Insert placeholder (take above branch next time) */
392 :
393 0 : int prepare_res = fd_neigh4_hmap_prepare( ctx->neigh4, &ip4_addr, NULL, query, 0 );
394 0 : if( FD_UNLIKELY( prepare_res!=FD_MAP_SUCCESS ) ) {
395 0 : ctx->metrics.neigh_solicits_fails++;
396 0 : return;
397 0 : }
398 0 : fd_neigh4_entry_t * ele = fd_neigh4_hmap_query_ele( query );
399 0 : ele->state = FD_NEIGH4_STATE_INCOMPLETE;
400 0 : ele->ip4_addr = ip4_addr;
401 0 : memset( ele->mac_addr, 0, 6UL );
402 0 : fd_neigh4_hmap_publish( query );
403 :
404 : /* Trigger neighbor solicit via netlink */
405 :
406 0 : int probe_res = fd_neigh4_probe_rate_limited( ctx->prober, ele, ip4_addr, now );
407 0 : if( probe_res==0 ) {
408 0 : ctx->metrics.neigh_solicits_sent++;
409 0 : } else if( probe_res>0 ) {
410 0 : ctx->metrics.neigh_solicits_fails++;
411 0 : }
412 :
413 0 : }
414 :
415 0 : #define STEM_BURST (1UL)
416 0 : #define STEM_LAZY ((ulong)13e6) /* 13ms */
417 :
418 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_netlink_tile_ctx_t
419 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_netlink_tile_ctx_t)
420 :
421 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
422 0 : #define STEM_CALLBACK_DURING_HOUSEKEEPING during_housekeeping
423 0 : #define STEM_CALLBACK_BEFORE_CREDIT before_credit
424 0 : #define STEM_CALLBACK_AFTER_POLL_OVERRUN after_poll_overrun
425 0 : #define STEM_CALLBACK_AFTER_FRAG after_frag
426 :
427 : #include "../stem/fd_stem.c"
428 :
429 : /* End stem methods */
430 :
431 : fd_topo_run_tile_t fd_tile_netlnk = {
432 : .name = "netlnk",
433 : .populate_allowed_seccomp = populate_allowed_seccomp,
434 : .populate_allowed_fds = populate_allowed_fds,
435 : .scratch_align = scratch_align,
436 : .scratch_footprint = scratch_footprint,
437 : .privileged_init = privileged_init,
438 : .unprivileged_init = unprivileged_init,
439 : .run = stem_run
440 : };
|