Line data Source code
1 : #define _GNU_SOURCE
2 : #include <errno.h>
3 : #include <netinet/in.h>
4 : #include <sys/socket.h>
5 : #include <sys/stat.h>
6 : #include <sys/uio.h>
7 : #include "fd_udpsock.h"
8 : #include "../../util/net/fd_eth.h"
9 : #include "../../util/net/fd_ip4.h"
10 : #include "../../util/net/fd_udp.h"
11 :
12 : /* FD_UDPSOCK_FRAME_ALIGN is the alignment of a packet frame */
13 :
14 0 : #define FD_UDPSOCK_FRAME_ALIGN (16UL)
15 0 : #define FD_UDPSOCK_HEADROOM (14UL+20UL+8UL) /* Ethernet, IPv4, UDP */
16 :
17 : struct fd_udpsock {
18 : fd_aio_t aio_self; /* aio provided by udpsock */
19 : fd_aio_t const * aio_rx; /* aio provided by receiver */
20 :
21 : int fd; /* file descriptor of actual socket */
22 : uint hdr_sz;
23 :
24 : /* Mock Ethernet fields */
25 :
26 : uchar eth_self_addr[ 6 ];
27 : uchar eth_peer_addr[ 6 ];
28 :
29 : /* Mock UDP/IPv4 fields */
30 :
31 : uint ip_self_addr; /* network byte order */
32 : ushort udp_self_port; /* little endian */
33 :
34 : /* Pointers to variable length data structures */
35 :
36 : ulong rx_cnt;
37 : struct mmsghdr * rx_msg;
38 : struct iovec * rx_iov;
39 : void * rx_frame;
40 : fd_aio_pkt_info_t * rx_pkt;
41 : ulong tx_cnt;
42 : struct mmsghdr * tx_msg;
43 : struct iovec * tx_iov;
44 : void * tx_frame;
45 :
46 : /* Variable length data structures follow ...
47 :
48 : struct mmsghdr [ rx_cnt ] (rx)
49 : struct mmsghdr [ tx_cnt ] (tx)
50 : struct iovec [ rx_cnt ] (rx)
51 : struct iovec [ tx_cnt ] (tx)
52 : uchar [ mtu ][ rx_cnt ] (rx)
53 : fd_aio_pkt_t [ rx_cnt ] (rx)
54 : struct sockaddr_in[ rx_cnt ] (rx)
55 : struct sockaddr_in[ tx_cnt ] (tx) */
56 : };
57 :
58 : /* Forward declaration */
59 : static int
60 : fd_udpsock_send( void * ctx,
61 : fd_aio_pkt_info_t const * batch,
62 : ulong batch_cnt,
63 : ulong * opt_batch_idx,
64 : int flush );
65 :
66 : FD_FN_CONST ulong
67 0 : fd_udpsock_align( void ) {
68 0 : return alignof(fd_udpsock_t);
69 0 : }
70 :
71 : FD_FN_CONST ulong
72 : fd_udpsock_footprint( ulong mtu,
73 : ulong rx_pkt_cnt,
74 0 : ulong tx_pkt_cnt ) {
75 :
76 0 : if( FD_UNLIKELY( ( mtu ==0UL )
77 0 : | ( mtu <=FD_UDPSOCK_HEADROOM )
78 0 : | ( rx_pkt_cnt==0UL )
79 0 : | ( tx_pkt_cnt==0UL ) ) )
80 0 : return 0UL;
81 :
82 0 : ulong tot_pkt_cnt = rx_pkt_cnt + tx_pkt_cnt;
83 0 : ulong aligned_mtu = fd_ulong_align_up( mtu, FD_UDPSOCK_FRAME_ALIGN );
84 :
85 0 : return
86 0 : FD_LAYOUT_FINI ( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND(
87 0 : FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND(
88 0 : FD_LAYOUT_APPEND( FD_LAYOUT_INIT,
89 0 : alignof( fd_udpsock_t ), sizeof( fd_udpsock_t ) ),
90 0 : alignof( struct mmsghdr ), tot_pkt_cnt*sizeof( struct mmsghdr ) ),
91 0 : alignof( struct iovec ), tot_pkt_cnt*sizeof( struct iovec ) ),
92 0 : FD_UDPSOCK_FRAME_ALIGN, rx_pkt_cnt *aligned_mtu ),
93 0 : alignof( fd_aio_pkt_info_t ), rx_pkt_cnt *sizeof( fd_aio_pkt_info_t ) ),
94 0 : alignof( struct sockaddr_in ), tot_pkt_cnt*sizeof( struct sockaddr_in ) ),
95 0 : FD_UDPSOCK_ALIGN );
96 0 : }
97 :
98 : void *
99 : fd_udpsock_new( void * shmem,
100 : ulong mtu,
101 : ulong rx_pkt_cnt,
102 0 : ulong tx_pkt_cnt ) {
103 :
104 0 : if( FD_UNLIKELY( !shmem ) ) {
105 0 : FD_LOG_WARNING(( "NULL shmem" ));
106 0 : return NULL;
107 0 : }
108 :
109 0 : ulong laddr = (ulong)shmem;
110 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( laddr, fd_udpsock_align() ) ) ) {
111 0 : FD_LOG_WARNING(( "misaligned shmem" ));
112 0 : return NULL;
113 0 : }
114 0 : ulong footprint = fd_udpsock_footprint( mtu, rx_pkt_cnt, tx_pkt_cnt );
115 0 : if( FD_UNLIKELY( !footprint ) ) {
116 0 : FD_LOG_WARNING(( "invalid footprint for config" ));
117 0 : return NULL;
118 0 : }
119 0 : laddr += FD_LAYOUT_INIT;
120 :
121 : /* Allocate main struct */
122 :
123 0 : laddr = fd_ulong_align_up( laddr, alignof(fd_udpsock_t) );
124 0 : fd_udpsock_t * sock = (fd_udpsock_t *)laddr;
125 0 : memset( sock, 0, sizeof(fd_udpsock_t) );
126 0 : sock->fd = -1;
127 0 : sock->rx_cnt = rx_pkt_cnt;
128 0 : sock->tx_cnt = tx_pkt_cnt;
129 0 : laddr += sizeof(fd_udpsock_t);
130 :
131 0 : ulong tot_pkt_cnt = rx_pkt_cnt + tx_pkt_cnt;
132 0 : ulong aligned_mtu = fd_ulong_align_up( mtu, FD_UDPSOCK_FRAME_ALIGN );
133 :
134 : /* Set defaults for mock network headers */
135 :
136 0 : memcpy( sock->eth_self_addr, (uchar[6]){0x00, 0x00, 0x5e, 0x00, 0x53, 0x42}, 6 );
137 0 : memcpy( sock->eth_peer_addr, (uchar[6]){0x00, 0x00, 0x5e, 0x00, 0x53, 0x43}, 6 );
138 :
139 0 : sock->ip_self_addr = FD_IP4_ADDR( 0, 0, 0, 0 );
140 0 : sock->udp_self_port = 0;
141 :
142 0 : sock->aio_self = (fd_aio_t){
143 0 : .ctx = sock,
144 0 : .send_func = fd_udpsock_send
145 0 : };
146 :
147 : /* Allocate variable-length data structures */
148 :
149 0 : laddr = fd_ulong_align_up( laddr, alignof(struct mmsghdr) );
150 0 : struct mmsghdr * msg = (struct mmsghdr *)laddr;
151 0 : sock->rx_msg = msg;
152 0 : sock->tx_msg = msg + rx_pkt_cnt;
153 0 : laddr += tot_pkt_cnt*sizeof(struct mmsghdr);
154 :
155 0 : laddr = fd_ulong_align_up( laddr, alignof(struct iovec) );
156 0 : struct iovec * iov = (struct iovec *)laddr;
157 0 : sock->rx_iov = iov;
158 0 : sock->tx_iov = iov + rx_pkt_cnt;
159 0 : laddr += tot_pkt_cnt*sizeof(struct iovec);
160 :
161 0 : laddr = fd_ulong_align_up( laddr, FD_UDPSOCK_FRAME_ALIGN );
162 0 : ulong frame_base = laddr;
163 0 : sock->rx_frame = (void *)laddr;
164 0 : sock->tx_frame = (void *)(laddr + aligned_mtu*rx_pkt_cnt);
165 0 : laddr += rx_pkt_cnt*aligned_mtu;
166 :
167 0 : laddr = fd_ulong_align_up( laddr, alignof(fd_aio_pkt_info_t) );
168 0 : fd_aio_pkt_info_t * pkt = (fd_aio_pkt_info_t *)laddr;
169 0 : sock->rx_pkt = pkt;
170 0 : laddr += rx_pkt_cnt*sizeof(fd_aio_pkt_info_t);
171 :
172 0 : laddr = fd_ulong_align_up( laddr, alignof(struct sockaddr_in) );
173 0 : struct sockaddr_in * saddrs = (struct sockaddr_in *)laddr;
174 0 : laddr += tot_pkt_cnt*sizeof(struct sockaddr_in);
175 :
176 : /* Prepare iovec and msghdr buffers */
177 :
178 0 : for( ulong i=0; i<rx_pkt_cnt; i++ ) {
179 0 : iov[i].iov_base = (void *)(frame_base + i*aligned_mtu + FD_UDPSOCK_HEADROOM);
180 0 : iov[i].iov_len = aligned_mtu - FD_UDPSOCK_HEADROOM;
181 0 : msg[i].msg_hdr.msg_iov = &iov[i];
182 0 : msg[i].msg_hdr.msg_iovlen = 1;
183 0 : msg[i].msg_hdr.msg_name = &saddrs[i];
184 0 : msg[i].msg_hdr.msg_namelen = sizeof(struct sockaddr_in);
185 0 : }
186 0 : for( ulong i=rx_pkt_cnt; i<tot_pkt_cnt; i++ ) {
187 0 : msg[i].msg_hdr.msg_iov = &iov[i];
188 0 : msg[i].msg_hdr.msg_iovlen = 1;
189 0 : msg[i].msg_hdr.msg_name = &saddrs[i];
190 0 : msg[i].msg_hdr.msg_namelen = sizeof(struct sockaddr_in);
191 0 : }
192 :
193 0 : fd_udpsock_set_layer( sock, FD_UDPSOCK_LAYER_ETH ); /* default */
194 0 : return shmem;
195 0 : }
196 :
197 : fd_udpsock_t *
198 : fd_udpsock_join( void * shsock,
199 0 : int fd ) {
200 :
201 0 : if( FD_UNLIKELY( !shsock ) ) {
202 0 : FD_LOG_WARNING(( "NULL shsock" ));
203 0 : return NULL;
204 0 : }
205 :
206 0 : fd_udpsock_t * sock = (fd_udpsock_t *)shsock;
207 0 : sock->fd = fd;
208 :
209 : /* Extract socket address */
210 0 : struct sockaddr addr;
211 0 : socklen_t addrlen = sizeof(addr);
212 0 : int res = getsockname( fd, &addr, &addrlen );
213 0 : if( FD_UNLIKELY( res < 0 ) ) {
214 0 : FD_LOG_WARNING(( "getsockname(%d) failed (%i-%s)", fd, errno, fd_io_strerror( errno ) ));
215 0 : return NULL;
216 0 : }
217 0 : if( FD_UNLIKELY( addr.sa_family != AF_INET ) ) {
218 0 : FD_LOG_WARNING(( "getsockname(%d) returned non-IPv4 address", fd ));
219 0 : return NULL;
220 0 : }
221 0 : struct sockaddr_in const * sin = (struct sockaddr_in const *)fd_type_pun_const( &addr );
222 0 : sock->ip_self_addr = sin->sin_addr.s_addr;
223 0 : sock->udp_self_port = fd_ushort_bswap( sin->sin_port );
224 :
225 0 : return sock;
226 0 : }
227 :
228 : void *
229 0 : fd_udpsock_leave( fd_udpsock_t * sock ) {
230 0 : if( FD_UNLIKELY( !sock ) ) {
231 0 : FD_LOG_WARNING(( "NULL sock" ));
232 0 : return NULL;
233 0 : }
234 0 : sock->fd = -1;
235 0 : return (void *)sock;
236 0 : }
237 :
238 : void *
239 0 : fd_udpsock_delete( void * shsock ) {
240 0 : if( FD_UNLIKELY( !shsock ) ) {
241 0 : FD_LOG_WARNING(( "NULL shsock" ));
242 0 : return NULL;
243 0 : }
244 0 : return shsock;
245 0 : }
246 :
247 : void
248 : fd_udpsock_set_rx( fd_udpsock_t * sock,
249 0 : fd_aio_t const * aio ) {
250 0 : sock->aio_rx = aio;
251 0 : }
252 :
253 : FD_FN_CONST fd_aio_t const *
254 0 : fd_udpsock_get_tx( fd_udpsock_t * sock ) {
255 0 : return &sock->aio_self;
256 0 : }
257 :
258 : void
259 0 : fd_udpsock_service( fd_udpsock_t * sock ) {
260 : /* Receive packets into iovecs */
261 :
262 0 : int fd = sock->fd;
263 0 : long res = recvmmsg( fd, sock->rx_msg, (uint)sock->rx_cnt, MSG_DONTWAIT, NULL );
264 0 : if( FD_UNLIKELY( res<0 ) ) {
265 0 : if( FD_LIKELY( (errno==EAGAIN) | (errno==EWOULDBLOCK) ) )
266 0 : return;
267 0 : FD_LOG_WARNING(( "recvmmsg(%d) failed (%i-%s)", fd, errno, fd_io_strerror( errno ) ));
268 0 : return;
269 0 : }
270 0 : ulong msg_cnt = (ulong)res;
271 :
272 : /* Create fake headers and prepare an aio batch */
273 :
274 0 : for( ulong i=0UL; i<msg_cnt; i++ ) {
275 0 : struct sockaddr_in const * addr = (struct sockaddr_in const *)sock->rx_msg[i].msg_hdr.msg_name;
276 :
277 0 : void * frame_base = (void *)( (ulong)sock->rx_iov[i].iov_base - sock->hdr_sz );
278 0 : fd_ip4_hdr_t * ip4;
279 0 : if( sock->hdr_sz==42 ) {
280 0 : fd_eth_hdr_t * eth = frame_base;
281 0 : memcpy( eth->dst, sock->eth_self_addr, 6 );
282 0 : memcpy( eth->src, sock->eth_peer_addr, 6 );
283 0 : eth->net_type = fd_ushort_bswap( FD_ETH_HDR_TYPE_IP );
284 0 : ip4 = (void *)( (ulong)eth + sizeof(fd_eth_hdr_t) );
285 0 : } else {
286 0 : ip4 = frame_base;
287 0 : }
288 :
289 0 : *ip4 = (fd_ip4_hdr_t) {
290 0 : .verihl = FD_IP4_VERIHL(4,5),
291 0 : .tos = 0,
292 0 : .net_tot_len = (ushort)( (ulong)sock->rx_msg[i].msg_len
293 0 : + sizeof(fd_ip4_hdr_t)
294 0 : + sizeof(fd_udp_hdr_t) ),
295 0 : .net_id = 0,
296 0 : .net_frag_off = 0,
297 0 : .ttl = 64,
298 0 : .protocol = FD_IP4_HDR_PROTOCOL_UDP,
299 0 : .check = 0
300 0 : };
301 : /* copy to avoid alignment issues */
302 0 : memcpy( ip4->saddr_c, &addr->sin_addr.s_addr, 4 );
303 0 : memcpy( ip4->daddr_c, &sock->ip_self_addr, 4 );
304 :
305 0 : fd_ip4_hdr_bswap( ip4 ); /* convert to "network" byte order */
306 0 : ip4->check = fd_ip4_hdr_check_fast( ip4 );
307 :
308 : /* Create UDP header with network byte order */
309 0 : fd_udp_hdr_t * udp = (fd_udp_hdr_t *)((ulong)ip4 + sizeof(fd_ip4_hdr_t));
310 0 : *udp = (fd_udp_hdr_t) {
311 0 : .net_sport = (ushort)addr->sin_port,
312 0 : .net_dport = (ushort)fd_ushort_bswap( sock->udp_self_port ),
313 0 : .net_len = (ushort)fd_ushort_bswap( (ushort)( (ulong)sock->rx_msg[i].msg_len + sizeof(fd_udp_hdr_t) ) ),
314 0 : .check = 0
315 0 : };
316 :
317 0 : sock->rx_pkt[i] = (fd_aio_pkt_info_t) {
318 0 : .buf = frame_base,
319 0 : .buf_sz = (ushort)( sock->hdr_sz + (ulong)sock->rx_msg[i].msg_len )
320 0 : };
321 0 : }
322 :
323 : /* Dispatch to recipient ignoring errors */
324 :
325 0 : fd_aio_send( sock->aio_rx, sock->rx_pkt, msg_cnt, NULL, 0 );
326 0 : }
327 :
328 : static int
329 : fd_udpsock_send( void * ctx,
330 : fd_aio_pkt_info_t const * batch,
331 : ulong batch_cnt,
332 : ulong * opt_batch_idx,
333 0 : int flush ) {
334 :
335 0 : fd_udpsock_t * sock = (fd_udpsock_t *)ctx;
336 :
337 0 : if( FD_UNLIKELY( batch_cnt == 0 ) )
338 0 : return FD_AIO_SUCCESS;
339 0 : ulong send_cnt = fd_ulong_if( batch_cnt > sock->tx_cnt, sock->tx_cnt, batch_cnt );
340 :
341 0 : ulong _dummy_batch_idx;
342 0 : opt_batch_idx = opt_batch_idx ? opt_batch_idx : &_dummy_batch_idx;
343 :
344 : /* Set up iovecs */
345 :
346 0 : ulong iov_idx = 0UL;
347 0 : for( ulong i=0UL; i<send_cnt; i++ ) {
348 0 : if( FD_UNLIKELY( batch[i].buf_sz < sock->hdr_sz ) ) continue;
349 :
350 : /* skip packets that aren't IP (like ARP) */
351 0 : fd_ip4_hdr_t * ip4;
352 0 : if( sock->hdr_sz==42 ) {
353 0 : fd_eth_hdr_t * eth = (fd_eth_hdr_t *)( (ulong)batch[i].buf );
354 0 : if( FD_UNLIKELY( eth->net_type != fd_ushort_bswap( FD_ETH_HDR_TYPE_IP ) ) ) continue;
355 0 : ip4 = (fd_ip4_hdr_t *)( (ulong)eth + sizeof(fd_eth_hdr_t) );
356 0 : } else {
357 0 : ip4 = batch[i].buf;
358 0 : }
359 :
360 0 : fd_ip4_hdr_bswap( ip4 ); /* convert to host byte order */
361 0 : uint daddr = 0;
362 0 : memcpy( &daddr, ip4->daddr_c, 4 );
363 0 : fd_udp_hdr_t * udp = (fd_udp_hdr_t *)( (ulong)ip4 + (ulong)FD_IP4_GET_LEN(*ip4) );
364 0 : fd_udp_hdr_bswap( udp ); /* convert to host byte order */
365 0 : ushort dport = udp->net_dport;
366 :
367 0 : void * payload = (void *)( (ulong)udp + sizeof(fd_udp_hdr_t) );
368 0 : sock->tx_iov[iov_idx].iov_base = payload;
369 0 : sock->tx_iov[iov_idx].iov_len = batch[i].buf_sz - (ulong)( (ulong)payload - (ulong)batch[i].buf );
370 0 : struct sockaddr_in * addr = (struct sockaddr_in *)sock->tx_msg[iov_idx].msg_hdr.msg_name;
371 0 : addr->sin_addr = (struct in_addr) { .s_addr = daddr };
372 0 : addr->sin_port = (ushort)fd_ushort_bswap( (ushort)dport );
373 :
374 0 : iov_idx++;
375 0 : }
376 0 : int fd = sock->fd;
377 0 : long res = sendmmsg( fd, sock->tx_msg, (uint)iov_idx, flush ? 0 : MSG_DONTWAIT );
378 0 : if( FD_UNLIKELY( res<0 ) ) {
379 0 : *opt_batch_idx = 0UL;
380 0 : if( FD_LIKELY( (errno==EAGAIN) | (errno==EWOULDBLOCK) ) )
381 0 : return FD_AIO_ERR_AGAIN;
382 0 : FD_LOG_WARNING(( "sendmmsg(%d) failed (%i-%s)", fd, errno, fd_io_strerror( errno ) ));
383 0 : return FD_AIO_ERR_INVAL;
384 0 : }
385 0 : ulong sent_cnt = (ulong)res;
386 :
387 0 : if( FD_UNLIKELY( iov_idx < sent_cnt ) ) {
388 0 : *opt_batch_idx = iov_idx;
389 0 : return FD_AIO_ERR_AGAIN;
390 0 : }
391 0 : return FD_AIO_SUCCESS;
392 0 : }
393 :
394 : uint
395 0 : fd_udpsock_get_ip4_address( fd_udpsock_t const * sock ) {
396 0 : return sock->ip_self_addr;
397 0 : }
398 :
399 : uint
400 0 : fd_udpsock_get_listen_port( fd_udpsock_t const * sock ) {
401 0 : return sock->udp_self_port;
402 0 : }
403 :
404 : fd_udpsock_t *
405 : fd_udpsock_set_layer( fd_udpsock_t * sock,
406 0 : uint layer ) {
407 0 : switch( layer ) {
408 0 : case FD_UDPSOCK_LAYER_ETH:
409 0 : sock->hdr_sz = sizeof(fd_eth_hdr_t) + sizeof(fd_ip4_hdr_t) + sizeof(fd_udp_hdr_t);
410 0 : break;
411 0 : case FD_UDPSOCK_LAYER_IP:
412 0 : sock->hdr_sz = sizeof(fd_ip4_hdr_t) + sizeof(fd_udp_hdr_t);
413 0 : break;
414 0 : default:
415 0 : FD_LOG_WARNING(( "invalid layer 0x%x", layer ));
416 0 : return NULL;
417 0 : }
418 0 : return sock;
419 0 : }
|