Line data Source code
1 : #define _GNU_SOURCE
2 : #include <errno.h>
3 : #include <netinet/in.h>
4 : #include <sys/socket.h>
5 : #include <sys/stat.h>
6 : #include "fd_udpsock.h"
7 : #include "../../util/net/fd_eth.h"
8 : #include "../../util/net/fd_ip4.h"
9 : #include "../../util/net/fd_udp.h"
10 :
11 : /* FD_UDPSOCK_FRAME_ALIGN is the alignment of a packet frame */
12 :
13 0 : #define FD_UDPSOCK_FRAME_ALIGN (16UL)
14 0 : #define FD_UDPSOCK_HEADROOM (14UL+20UL+8UL) /* Ethernet, IPv4, UDP */
15 :
16 : struct fd_udpsock {
17 : fd_aio_t aio_self; /* aio provided by udpsock */
18 : fd_aio_t const * aio_rx; /* aio provided by receiver */
19 :
20 : int fd; /* file descriptor of actual socket */
21 : uint hdr_sz;
22 :
23 : /* Mock Ethernet fields */
24 :
25 : uchar eth_self_addr[ 6 ];
26 : uchar eth_peer_addr[ 6 ];
27 :
28 : /* Mock UDP/IPv4 fields */
29 :
30 : uint ip_self_addr; /* network byte order */
31 : ushort udp_self_port; /* little endian */
32 :
33 : /* Pointers to variable length data structures */
34 :
35 : ulong rx_cnt;
36 : struct mmsghdr * rx_msg;
37 : struct iovec * rx_iov;
38 : void * rx_frame;
39 : fd_aio_pkt_info_t * rx_pkt;
40 : ulong tx_cnt;
41 : struct mmsghdr * tx_msg;
42 : struct iovec * tx_iov;
43 : void * tx_frame;
44 :
45 : /* Variable length data structures follow ...
46 :
47 : struct mmsghdr [ rx_cnt ] (rx)
48 : struct mmsghdr [ tx_cnt ] (tx)
49 : struct iovec [ rx_cnt ] (rx)
50 : struct iovec [ tx_cnt ] (tx)
51 : uchar [ mtu ][ rx_cnt ] (rx)
52 : fd_aio_pkt_t [ rx_cnt ] (rx)
53 : struct sockaddr_in[ rx_cnt ] (rx)
54 : struct sockaddr_in[ tx_cnt ] (tx) */
55 : };
56 :
57 : /* Forward declaration */
58 : static int
59 : fd_udpsock_send( void * ctx,
60 : fd_aio_pkt_info_t const * batch,
61 : ulong batch_cnt,
62 : ulong * opt_batch_idx,
63 : int flush );
64 :
65 : FD_FN_CONST ulong
66 0 : fd_udpsock_align( void ) {
67 0 : return alignof(fd_udpsock_t);
68 0 : }
69 :
70 : FD_FN_CONST ulong
71 : fd_udpsock_footprint( ulong mtu,
72 : ulong rx_pkt_cnt,
73 0 : ulong tx_pkt_cnt ) {
74 :
75 0 : if( FD_UNLIKELY( ( mtu ==0UL )
76 0 : | ( mtu <=FD_UDPSOCK_HEADROOM )
77 0 : | ( rx_pkt_cnt==0UL )
78 0 : | ( tx_pkt_cnt==0UL ) ) )
79 0 : return 0UL;
80 :
81 0 : ulong tot_pkt_cnt = rx_pkt_cnt + tx_pkt_cnt;
82 0 : ulong aligned_mtu = fd_ulong_align_up( mtu, FD_UDPSOCK_FRAME_ALIGN );
83 :
84 0 : return
85 0 : FD_LAYOUT_FINI ( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND(
86 0 : FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND(
87 0 : FD_LAYOUT_APPEND( FD_LAYOUT_INIT,
88 0 : alignof( fd_udpsock_t ), sizeof( fd_udpsock_t ) ),
89 0 : alignof( struct mmsghdr ), tot_pkt_cnt*sizeof( struct mmsghdr ) ),
90 0 : alignof( struct iovec ), tot_pkt_cnt*sizeof( struct iovec ) ),
91 0 : FD_UDPSOCK_FRAME_ALIGN, rx_pkt_cnt *aligned_mtu ),
92 0 : alignof( fd_aio_pkt_info_t ), rx_pkt_cnt *sizeof( fd_aio_pkt_info_t ) ),
93 0 : alignof( struct sockaddr_in ), tot_pkt_cnt*sizeof( struct sockaddr_in ) ),
94 0 : FD_UDPSOCK_ALIGN );
95 0 : }
96 :
97 : void *
98 : fd_udpsock_new( void * shmem,
99 : ulong mtu,
100 : ulong rx_pkt_cnt,
101 0 : ulong tx_pkt_cnt ) {
102 :
103 0 : if( FD_UNLIKELY( !shmem ) ) {
104 0 : FD_LOG_WARNING(( "NULL shmem" ));
105 0 : return NULL;
106 0 : }
107 :
108 0 : ulong laddr = (ulong)shmem;
109 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( laddr, fd_udpsock_align() ) ) ) {
110 0 : FD_LOG_WARNING(( "misaligned shmem" ));
111 0 : return NULL;
112 0 : }
113 0 : ulong footprint = fd_udpsock_footprint( mtu, rx_pkt_cnt, tx_pkt_cnt );
114 0 : if( FD_UNLIKELY( !footprint ) ) {
115 0 : FD_LOG_WARNING(( "invalid footprint for config" ));
116 0 : return NULL;
117 0 : }
118 0 : laddr += FD_LAYOUT_INIT;
119 :
120 : /* Allocate main struct */
121 :
122 0 : laddr = fd_ulong_align_up( laddr, alignof(fd_udpsock_t) );
123 0 : fd_udpsock_t * sock = (fd_udpsock_t *)laddr;
124 0 : memset( sock, 0, sizeof(fd_udpsock_t) );
125 0 : sock->fd = -1;
126 0 : sock->rx_cnt = rx_pkt_cnt;
127 0 : sock->tx_cnt = tx_pkt_cnt;
128 0 : laddr += sizeof(fd_udpsock_t);
129 :
130 0 : ulong tot_pkt_cnt = rx_pkt_cnt + tx_pkt_cnt;
131 0 : ulong aligned_mtu = fd_ulong_align_up( mtu, FD_UDPSOCK_FRAME_ALIGN );
132 :
133 : /* Set defaults for mock network headers */
134 :
135 0 : memcpy( sock->eth_self_addr, (uchar[6]){0x00, 0x00, 0x5e, 0x00, 0x53, 0x42}, 6 );
136 0 : memcpy( sock->eth_peer_addr, (uchar[6]){0x00, 0x00, 0x5e, 0x00, 0x53, 0x43}, 6 );
137 :
138 0 : sock->ip_self_addr = FD_IP4_ADDR( 0, 0, 0, 0 );
139 0 : sock->udp_self_port = 0;
140 :
141 0 : sock->aio_self = (fd_aio_t){
142 0 : .ctx = sock,
143 0 : .send_func = fd_udpsock_send
144 0 : };
145 :
146 : /* Allocate variable-length data structures */
147 :
148 0 : laddr = fd_ulong_align_up( laddr, alignof(struct mmsghdr) );
149 0 : struct mmsghdr * msg = (struct mmsghdr *)laddr;
150 0 : sock->rx_msg = msg;
151 0 : sock->tx_msg = msg + rx_pkt_cnt;
152 0 : laddr += tot_pkt_cnt*sizeof(struct mmsghdr);
153 :
154 0 : laddr = fd_ulong_align_up( laddr, alignof(struct iovec) );
155 0 : struct iovec * iov = (struct iovec *)laddr;
156 0 : sock->rx_iov = iov;
157 0 : sock->tx_iov = iov + rx_pkt_cnt;
158 0 : laddr += tot_pkt_cnt*sizeof(struct iovec);
159 :
160 0 : laddr = fd_ulong_align_up( laddr, FD_UDPSOCK_FRAME_ALIGN );
161 0 : ulong frame_base = laddr;
162 0 : sock->rx_frame = (void *)laddr;
163 0 : sock->tx_frame = (void *)(laddr + aligned_mtu*rx_pkt_cnt);
164 0 : laddr += rx_pkt_cnt*aligned_mtu;
165 :
166 0 : laddr = fd_ulong_align_up( laddr, alignof(fd_aio_pkt_info_t) );
167 0 : fd_aio_pkt_info_t * pkt = (fd_aio_pkt_info_t *)laddr;
168 0 : sock->rx_pkt = pkt;
169 0 : laddr += rx_pkt_cnt*sizeof(fd_aio_pkt_info_t);
170 :
171 0 : laddr = fd_ulong_align_up( laddr, alignof(struct sockaddr_in) );
172 0 : struct sockaddr_in * saddrs = (struct sockaddr_in *)laddr;
173 0 : laddr += tot_pkt_cnt*sizeof(struct sockaddr_in);
174 :
175 : /* Prepare iovec and msghdr buffers */
176 :
177 0 : for( ulong i=0; i<rx_pkt_cnt; i++ ) {
178 0 : iov[i].iov_base = (void *)(frame_base + i*aligned_mtu + FD_UDPSOCK_HEADROOM);
179 0 : iov[i].iov_len = aligned_mtu - FD_UDPSOCK_HEADROOM;
180 0 : msg[i].msg_hdr.msg_iov = &iov[i];
181 0 : msg[i].msg_hdr.msg_iovlen = 1;
182 0 : msg[i].msg_hdr.msg_name = &saddrs[i];
183 0 : msg[i].msg_hdr.msg_namelen = sizeof(struct sockaddr_in);
184 0 : }
185 0 : for( ulong i=rx_pkt_cnt; i<tot_pkt_cnt; i++ ) {
186 0 : msg[i].msg_hdr.msg_iov = &iov[i];
187 0 : msg[i].msg_hdr.msg_iovlen = 1;
188 0 : msg[i].msg_hdr.msg_name = &saddrs[i];
189 0 : msg[i].msg_hdr.msg_namelen = sizeof(struct sockaddr_in);
190 0 : }
191 :
192 0 : fd_udpsock_set_layer( sock, FD_UDPSOCK_LAYER_ETH ); /* default */
193 0 : return shmem;
194 0 : }
195 :
196 : fd_udpsock_t *
197 : fd_udpsock_join( void * shsock,
198 0 : int fd ) {
199 :
200 0 : if( FD_UNLIKELY( !shsock ) ) {
201 0 : FD_LOG_WARNING(( "NULL shsock" ));
202 0 : return NULL;
203 0 : }
204 :
205 0 : fd_udpsock_t * sock = (fd_udpsock_t *)shsock;
206 0 : sock->fd = fd;
207 :
208 : /* Extract socket address */
209 0 : struct sockaddr addr;
210 0 : socklen_t addrlen = sizeof(addr);
211 0 : int res = getsockname( fd, &addr, &addrlen );
212 0 : if( FD_UNLIKELY( res < 0 ) ) {
213 0 : FD_LOG_WARNING(( "getsockname(%d) failed (%i-%s)", fd, errno, fd_io_strerror( errno ) ));
214 0 : return NULL;
215 0 : }
216 0 : if( FD_UNLIKELY( addr.sa_family != AF_INET ) ) {
217 0 : FD_LOG_WARNING(( "getsockname(%d) returned non-IPv4 address", fd ));
218 0 : return NULL;
219 0 : }
220 0 : struct sockaddr_in const * sin = (struct sockaddr_in const *)fd_type_pun_const( &addr );
221 0 : sock->ip_self_addr = sin->sin_addr.s_addr;
222 0 : sock->udp_self_port = fd_ushort_bswap( sin->sin_port );
223 :
224 0 : return sock;
225 0 : }
226 :
227 : void *
228 0 : fd_udpsock_leave( fd_udpsock_t * sock ) {
229 0 : if( FD_UNLIKELY( !sock ) ) {
230 0 : FD_LOG_WARNING(( "NULL sock" ));
231 0 : return NULL;
232 0 : }
233 0 : sock->fd = -1;
234 0 : return (void *)sock;
235 0 : }
236 :
237 : void *
238 0 : fd_udpsock_delete( void * shsock ) {
239 0 : if( FD_UNLIKELY( !shsock ) ) {
240 0 : FD_LOG_WARNING(( "NULL shsock" ));
241 0 : return NULL;
242 0 : }
243 0 : return shsock;
244 0 : }
245 :
246 : void
247 : fd_udpsock_set_rx( fd_udpsock_t * sock,
248 0 : fd_aio_t const * aio ) {
249 0 : sock->aio_rx = aio;
250 0 : }
251 :
252 : FD_FN_CONST fd_aio_t const *
253 0 : fd_udpsock_get_tx( fd_udpsock_t * sock ) {
254 0 : return &sock->aio_self;
255 0 : }
256 :
257 : void
258 0 : fd_udpsock_service( fd_udpsock_t * sock ) {
259 : /* Receive packets into iovecs */
260 :
261 0 : int fd = sock->fd;
262 0 : long res = recvmmsg( fd, sock->rx_msg, (uint)sock->rx_cnt, MSG_DONTWAIT, NULL );
263 0 : if( FD_UNLIKELY( res<0 ) ) {
264 0 : if( FD_LIKELY( (errno==EAGAIN) | (errno==EWOULDBLOCK) ) )
265 0 : return;
266 0 : FD_LOG_WARNING(( "recvmmsg(%d) failed (%i-%s)", fd, errno, fd_io_strerror( errno ) ));
267 0 : return;
268 0 : }
269 0 : ulong msg_cnt = (ulong)res;
270 :
271 : /* Create fake headers and prepare an aio batch */
272 :
273 0 : for( ulong i=0UL; i<msg_cnt; i++ ) {
274 0 : struct sockaddr_in const * addr = (struct sockaddr_in const *)sock->rx_msg[i].msg_hdr.msg_name;
275 :
276 0 : void * frame_base = (void *)( (ulong)sock->rx_iov[i].iov_base - sock->hdr_sz );
277 0 : fd_ip4_hdr_t * ip4;
278 0 : if( sock->hdr_sz==42 ) {
279 0 : fd_eth_hdr_t * eth = frame_base;
280 0 : memcpy( eth->dst, sock->eth_self_addr, 6 );
281 0 : memcpy( eth->src, sock->eth_peer_addr, 6 );
282 0 : eth->net_type = fd_ushort_bswap( FD_ETH_HDR_TYPE_IP );
283 0 : ip4 = (void *)( (ulong)eth + sizeof(fd_eth_hdr_t) );
284 0 : } else {
285 0 : ip4 = frame_base;
286 0 : }
287 :
288 0 : *ip4 = (fd_ip4_hdr_t) {
289 0 : .verihl = FD_IP4_VERIHL(4,5),
290 0 : .tos = 0,
291 0 : .net_tot_len = (ushort)( (ulong)sock->rx_msg[i].msg_len
292 0 : + sizeof(fd_ip4_hdr_t)
293 0 : + sizeof(fd_udp_hdr_t) ),
294 0 : .net_id = 0,
295 0 : .net_frag_off = 0,
296 0 : .ttl = 64,
297 0 : .protocol = FD_IP4_HDR_PROTOCOL_UDP,
298 0 : .check = 0
299 0 : };
300 : /* copy to avoid alignment issues */
301 0 : memcpy( ip4->saddr_c, &addr->sin_addr.s_addr, 4 );
302 0 : memcpy( ip4->daddr_c, &sock->ip_self_addr, 4 );
303 :
304 0 : fd_ip4_hdr_bswap( ip4 ); /* convert to "network" byte order */
305 0 : ip4->check = fd_ip4_hdr_check_fast( ip4 );
306 :
307 : /* Create UDP header with network byte order */
308 0 : fd_udp_hdr_t * udp = (fd_udp_hdr_t *)((ulong)ip4 + sizeof(fd_ip4_hdr_t));
309 0 : *udp = (fd_udp_hdr_t) {
310 0 : .net_sport = (ushort)addr->sin_port,
311 0 : .net_dport = (ushort)fd_ushort_bswap( sock->udp_self_port ),
312 0 : .net_len = (ushort)fd_ushort_bswap( (ushort)( (ulong)sock->rx_msg[i].msg_len + sizeof(fd_udp_hdr_t) ) ),
313 0 : .check = 0
314 0 : };
315 :
316 0 : sock->rx_pkt[i] = (fd_aio_pkt_info_t) {
317 0 : .buf = frame_base,
318 0 : .buf_sz = (ushort)( sock->hdr_sz + (ulong)sock->rx_msg[i].msg_len )
319 0 : };
320 0 : }
321 :
322 : /* Dispatch to recipient ignoring errors */
323 :
324 0 : fd_aio_send( sock->aio_rx, sock->rx_pkt, msg_cnt, NULL, 0 );
325 0 : }
326 :
327 : static int
328 : fd_udpsock_send( void * ctx,
329 : fd_aio_pkt_info_t const * batch,
330 : ulong batch_cnt,
331 : ulong * opt_batch_idx,
332 0 : int flush ) {
333 :
334 0 : fd_udpsock_t * sock = (fd_udpsock_t *)ctx;
335 :
336 0 : if( FD_UNLIKELY( batch_cnt == 0 ) )
337 0 : return FD_AIO_SUCCESS;
338 0 : ulong send_cnt = fd_ulong_if( batch_cnt > sock->tx_cnt, sock->tx_cnt, batch_cnt );
339 :
340 0 : ulong _dummy_batch_idx;
341 0 : opt_batch_idx = opt_batch_idx ? opt_batch_idx : &_dummy_batch_idx;
342 :
343 : /* Set up iovecs */
344 :
345 0 : ulong iov_idx = 0UL;
346 0 : for( ulong i=0UL; i<send_cnt; i++ ) {
347 0 : if( FD_UNLIKELY( batch[i].buf_sz < sock->hdr_sz ) ) continue;
348 :
349 : /* skip packets that aren't IP (like ARP) */
350 0 : fd_ip4_hdr_t * ip4;
351 0 : if( sock->hdr_sz==42 ) {
352 0 : fd_eth_hdr_t * eth = (fd_eth_hdr_t *)( (ulong)batch[i].buf );
353 0 : if( FD_UNLIKELY( eth->net_type != fd_ushort_bswap( FD_ETH_HDR_TYPE_IP ) ) ) continue;
354 0 : ip4 = (fd_ip4_hdr_t *)( (ulong)eth + sizeof(fd_eth_hdr_t) );
355 0 : } else {
356 0 : ip4 = batch[i].buf;
357 0 : }
358 :
359 0 : fd_ip4_hdr_bswap( ip4 ); /* convert to host byte order */
360 0 : uint daddr = 0;
361 0 : memcpy( &daddr, ip4->daddr_c, 4 );
362 0 : fd_udp_hdr_t * udp = (fd_udp_hdr_t *)( (ulong)ip4 + (ulong)FD_IP4_GET_LEN(*ip4) );
363 0 : fd_udp_hdr_bswap( udp ); /* convert to host byte order */
364 0 : ushort dport = udp->net_dport;
365 :
366 0 : void * payload = (void *)( (ulong)udp + sizeof(fd_udp_hdr_t) );
367 0 : sock->tx_iov[iov_idx].iov_base = payload;
368 0 : sock->tx_iov[iov_idx].iov_len = batch[i].buf_sz - (ulong)( (ulong)payload - (ulong)batch[i].buf );
369 0 : struct sockaddr_in * addr = (struct sockaddr_in *)sock->tx_msg[iov_idx].msg_hdr.msg_name;
370 0 : addr->sin_addr = (struct in_addr) { .s_addr = daddr };
371 0 : addr->sin_port = (ushort)fd_ushort_bswap( (ushort)dport );
372 :
373 0 : iov_idx++;
374 0 : }
375 0 : int fd = sock->fd;
376 0 : long res = sendmmsg( fd, sock->tx_msg, (uint)iov_idx, flush ? 0 : MSG_DONTWAIT );
377 0 : if( FD_UNLIKELY( res<0 ) ) {
378 0 : *opt_batch_idx = 0UL;
379 0 : if( FD_LIKELY( (errno==EAGAIN) | (errno==EWOULDBLOCK) ) )
380 0 : return FD_AIO_ERR_AGAIN;
381 0 : FD_LOG_WARNING(( "sendmmsg(%d) failed (%i-%s)", fd, errno, fd_io_strerror( errno ) ));
382 0 : return FD_AIO_ERR_INVAL;
383 0 : }
384 0 : ulong sent_cnt = (ulong)res;
385 :
386 0 : if( FD_UNLIKELY( iov_idx < sent_cnt ) ) {
387 0 : *opt_batch_idx = iov_idx;
388 0 : return FD_AIO_ERR_AGAIN;
389 0 : }
390 0 : return FD_AIO_SUCCESS;
391 0 : }
392 :
393 : uint
394 0 : fd_udpsock_get_ip4_address( fd_udpsock_t const * sock ) {
395 0 : return sock->ip_self_addr;
396 0 : }
397 :
398 : uint
399 0 : fd_udpsock_get_listen_port( fd_udpsock_t const * sock ) {
400 0 : return sock->udp_self_port;
401 0 : }
402 :
403 : fd_udpsock_t *
404 : fd_udpsock_set_layer( fd_udpsock_t * sock,
405 0 : uint layer ) {
406 0 : switch( layer ) {
407 0 : case FD_UDPSOCK_LAYER_ETH:
408 0 : sock->hdr_sz = sizeof(fd_eth_hdr_t) + sizeof(fd_ip4_hdr_t) + sizeof(fd_udp_hdr_t);
409 0 : break;
410 0 : case FD_UDPSOCK_LAYER_IP:
411 0 : sock->hdr_sz = sizeof(fd_ip4_hdr_t) + sizeof(fd_udp_hdr_t);
412 0 : break;
413 0 : default:
414 0 : FD_LOG_WARNING(( "invalid layer 0x%x", layer ));
415 0 : return NULL;
416 0 : }
417 0 : return sock;
418 0 : }
|