Line data Source code
1 : #define _GNU_SOURCE /* SYS_close */
2 : #include <sys/socket.h>
3 : #include <netinet/in.h>
4 : #include <netinet/tcp.h>
5 : #include <netdb.h>
6 : #include <arpa/inet.h>
7 : #include <stdint.h>
8 : #include <string.h>
9 : #include <poll.h>
10 : #include <time.h>
11 : #include <unistd.h>
12 : #include <errno.h>
13 : #include <pthread.h>
14 : #include "syscall.h"
15 : #include "fd_lookup.h"
16 :
17 : #pragma GCC diagnostic ignored "-Wconversion"
18 : #pragma GCC diagnostic ignored "-Wsign-compare"
19 : #pragma GCC diagnostic ignored "-Wsign-conversion"
20 :
21 : static void
22 0 : cleanup( struct pollfd * pfd ) {
23 0 : for( int i=0; pfd[i].fd >= -1; i++ ) {
24 0 : if( pfd[i].fd >= 0 ) {
25 0 : syscall( SYS_close, pfd[i].fd );
26 0 : }
27 0 : }
28 0 : }
29 :
30 : static ulong
31 0 : mtime( void ) {
32 0 : struct timespec ts;
33 0 : if( clock_gettime( CLOCK_MONOTONIC, &ts ) < 0 && errno == ENOSYS )
34 0 : clock_gettime( CLOCK_REALTIME, &ts );
35 0 : return (ulong)ts.tv_sec * 1000
36 0 : + ts.tv_nsec / 1000000;
37 0 : }
38 :
39 : static int
40 : start_tcp( struct pollfd * pfd,
41 : int family,
42 : void const * sa,
43 : socklen_t sl,
44 : uchar const * q,
45 0 : int ql ) {
46 0 : struct msghdr mh = {
47 0 : .msg_name = (void *)sa,
48 0 : .msg_namelen = sl,
49 0 : .msg_iovlen = 2,
50 0 : .msg_iov = (struct iovec [2]){
51 0 : { .iov_base = (uint8_t[]){ ql>>8, ql }, .iov_len = 2 },
52 0 : { .iov_base = (void *)q, .iov_len = ql } },
53 0 : .msg_control = NULL,
54 0 : .msg_controllen = 0,
55 0 : .msg_flags = 0
56 0 : };
57 0 : int fd = socket( family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0 );
58 0 : pfd->fd = fd;
59 0 : pfd->events = POLLOUT;
60 0 : if( !setsockopt( fd, IPPROTO_TCP, TCP_FASTOPEN_CONNECT,
61 0 : &(int){1}, sizeof(int) ) ) {
62 0 : int r = sendmsg( fd, &mh, MSG_FASTOPEN|MSG_NOSIGNAL );
63 0 : if( r == ql+2 ) pfd->events = POLLIN;
64 0 : if( r >= 0 ) return r;
65 0 : if( errno == EINPROGRESS ) return 0;
66 0 : }
67 0 : int r = connect( fd, sa, sl );
68 0 : if( !r || errno == EINPROGRESS ) return 0;
69 0 : close( fd );
70 0 : pfd->fd = -1;
71 0 : return -1;
72 0 : }
73 :
74 : static void
75 : step_mh( struct msghdr * mh,
76 0 : size_t n ) {
77 : /* Adjust iovec in msghdr to skip first n bytes. */
78 0 : while( mh->msg_iovlen && n >= mh->msg_iov->iov_len ) {
79 0 : n -= mh->msg_iov->iov_len;
80 0 : mh->msg_iov++;
81 0 : mh->msg_iovlen--;
82 0 : }
83 0 : if( !mh->msg_iovlen ) return;
84 0 : mh->msg_iov->iov_base = (char *)mh->msg_iov->iov_base + n;
85 0 : mh->msg_iov->iov_len -= n;
86 0 : }
87 :
88 : /* Internal contract for __res_msend[_rc]: asize must be >=512, nqueries
89 : * must be sufficiently small to be safe as VLA size. In practice it's
90 : * either 1 or 2, anyway. */
91 :
92 : int
93 : fd_res_msend_rc( int nqueries,
94 : uchar const * const * queries,
95 : int const * qlens,
96 : uchar * const * answers,
97 : int * alens,
98 : int asize,
99 0 : fd_resolvconf_t const * conf ) {
100 0 : int fd;
101 0 : int servfail_retry = 0;
102 0 : union {
103 0 : struct sockaddr_in sin;
104 0 : struct sockaddr_in6 sin6;
105 0 : } sa = {0}, ns[MAXNS] = {0};
106 0 : socklen_t sl = sizeof sa.sin;
107 0 : int nns = 0;
108 0 : int family = AF_INET;
109 0 : int next;
110 0 : int i, j;
111 0 : struct pollfd pfd[nqueries+2];
112 0 : int qpos[nqueries], apos[nqueries];
113 0 : uchar alen_buf[nqueries][2];
114 :
115 0 : int timeout = 1000*conf->timeout;
116 0 : int attempts = conf->attempts;
117 :
118 0 : for( nns=0; nns<conf->nns; nns++ ) {
119 0 : const struct address *iplit = &conf->ns[nns];
120 0 : if( iplit->family == AF_INET ) {
121 0 : memcpy( &ns[nns].sin.sin_addr, iplit->addr, 4 );
122 0 : ns[nns].sin.sin_port = htons(53);
123 0 : ns[nns].sin.sin_family = AF_INET;
124 0 : } else {
125 0 : sl = sizeof sa.sin6;
126 0 : memcpy( &ns[nns].sin6.sin6_addr, iplit->addr, 16 );
127 0 : ns[nns].sin6.sin6_port = htons(53);
128 0 : ns[nns].sin6.sin6_scope_id = iplit->scopeid;
129 0 : ns[nns].sin6.sin6_family = family = AF_INET6;
130 0 : }
131 0 : }
132 :
133 : /* Get local address and open/bind a socket */
134 0 : fd = socket( family, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0 );
135 :
136 : /* Handle case where system lacks IPv6 support */
137 0 : if( fd < 0 && family == AF_INET6 && errno == EAFNOSUPPORT ) {
138 0 : for( i=0; i<nns && conf->ns[nns].family == AF_INET6; i++ );
139 0 : if( i==nns ) {
140 0 : return -1;
141 0 : }
142 0 : fd = socket( AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0 );
143 0 : family = AF_INET;
144 0 : sl = sizeof sa.sin;
145 0 : }
146 :
147 : /* Convert any IPv4 addresses in a mixed environment to v4-mapped */
148 0 : if( fd >= 0 && family == AF_INET6 ) {
149 0 : setsockopt( fd, IPPROTO_IPV6, IPV6_V6ONLY, &(int){0}, sizeof 0 );
150 0 : for( i=0; i<nns; i++ ) {
151 0 : if( ns[i].sin.sin_family != AF_INET ) continue;
152 0 : memcpy( ns[i].sin6.sin6_addr.s6_addr+12, &ns[i].sin.sin_addr, 4 );
153 0 : memcpy( ns[i].sin6.sin6_addr.s6_addr, "\0\0\0\0\0\0\0\0\0\0\xff\xff", 12 );
154 0 : ns[i].sin6.sin6_family = AF_INET6;
155 0 : ns[i].sin6.sin6_flowinfo = 0;
156 0 : ns[i].sin6.sin6_scope_id = 0;
157 0 : }
158 0 : }
159 :
160 0 : sa.sin.sin_family = family;
161 0 : if( fd < 0 || bind( fd, (void *)&sa, sl ) < 0 ) {
162 0 : if( fd >= 0 ) close( fd );
163 0 : return -1;
164 0 : }
165 :
166 : /* Past this point, there are no errors. Each individual query will
167 : * yield either no reply (indicated by zero length) or an answer
168 : * packet which is up to the caller to interpret. */
169 :
170 0 : for( i=0; i<nqueries; i++ ) pfd[i].fd = -1;
171 0 : pfd[nqueries].fd = fd;
172 0 : pfd[nqueries].events = POLLIN;
173 0 : pfd[nqueries+1].fd = -2;
174 :
175 0 : memset( alens, 0, sizeof *alens * nqueries );
176 :
177 0 : int retry_interval = timeout / attempts;
178 0 : next = 0;
179 0 : ulong t2 = mtime();
180 0 : ulong t0 = t2;
181 0 : ulong t1 = t2 - retry_interval;
182 :
183 0 : for( ; t2-t0 < timeout; t2=mtime() ) {
184 : /* This is the loop exit condition: that all queries
185 : * have an accepted answer. */
186 0 : for( i=0; i<nqueries && alens[i]>0; i++ );
187 0 : if( i==nqueries ) break;
188 :
189 0 : if( t2-t1 >= retry_interval ) {
190 : /* Query all configured namservers in parallel */
191 0 : for( i=0; i<nqueries; i++ )
192 0 : if( !alens[i] )
193 0 : for( j=0; j<nns; j++ )
194 0 : sendto( fd, queries[i],
195 0 : qlens[i], MSG_NOSIGNAL,
196 0 : (void *)&ns[j], sl );
197 0 : t1 = t2;
198 0 : servfail_retry = 2 * nqueries;
199 0 : }
200 :
201 : /* Wait for a response, or until time to retry */
202 0 : if( poll( pfd, nqueries+1, t1+retry_interval-t2 ) <= 0 ) continue;
203 :
204 0 : while( next < nqueries ) {
205 0 : struct msghdr mh = {
206 0 : .msg_name = (void *)&sa,
207 0 : .msg_namelen = sl,
208 0 : .msg_iovlen = 1,
209 0 : .msg_iov = (struct iovec []){
210 0 : { .iov_base = (void *)answers[next],
211 0 : .iov_len = asize }
212 0 : },
213 0 : .msg_control = NULL,
214 0 : .msg_controllen = 0,
215 0 : .msg_flags = 0
216 0 : };
217 0 : int rlen = recvmsg( fd, &mh, 0 );
218 0 : if( rlen < 0 ) break;
219 :
220 : /* Ignore non-identifiable packets */
221 0 : if( rlen < 4 ) continue;
222 :
223 : /* Ignore replies from addresses we didn't send to */
224 0 : for( j=0; j<nns && memcmp( ns+j, &sa, sl ); j++ );
225 0 : if( j==nns ) continue;
226 :
227 : /* Find which query this answer goes with, if any */
228 0 : for( i=next; i<nqueries && (
229 0 : answers[next][0] != queries[i][0] ||
230 0 : answers[next][1] != queries[i][1] ); i++ );
231 0 : if( i==nqueries ) continue;
232 0 : if( alens[i] ) continue;
233 :
234 : /* Only accept positive or negative responses;
235 : * retry immediately on server failure, and ignore
236 : * all other codes such as refusal. */
237 0 : switch( answers[next][3] & 15 ) {
238 0 : case 0:
239 0 : case 3:
240 0 : break;
241 0 : case 2:
242 0 : if( servfail_retry && servfail_retry-- )
243 0 : sendto( fd, queries[i], qlens[i], MSG_NOSIGNAL, (void *)&ns[j], sl );
244 0 : __attribute__((fallthrough));
245 0 : default:
246 0 : continue;
247 0 : }
248 :
249 : /* Store answer in the right slot, or update next
250 : * available temp slot if it's already in place. */
251 0 : alens[i] = rlen;
252 0 : if( i == next )
253 0 : for( ; next<nqueries && alens[next]; next++ );
254 0 : else
255 0 : memcpy( answers[i], answers[next], rlen );
256 :
257 : /* Ignore further UDP if all slots full or TCP-mode */
258 0 : if( next == nqueries ) pfd[nqueries].events = 0;
259 :
260 : /* If answer is truncated (TC bit), fallback to TCP */
261 0 : if( (answers[i][2] & 2) || (mh.msg_flags & MSG_TRUNC) ) {
262 0 : alens[i] = -1;
263 0 : int r = start_tcp( pfd+i, family, ns+j, sl, queries[i], qlens[i] );
264 0 : if( r >= 0 ) {
265 0 : qpos[i] = r;
266 0 : apos[i] = 0;
267 0 : }
268 0 : continue;
269 0 : }
270 0 : }
271 :
272 0 : for( i=0; i<nqueries; i++ ) if( pfd[i].revents & POLLOUT ) {
273 0 : struct msghdr mh = {
274 0 : .msg_iovlen = 2,
275 0 : .msg_iov = (struct iovec [2]){
276 0 : { .iov_base = (uint8_t[]){ qlens[i]>>8, qlens[i] }, .iov_len = 2 },
277 0 : { .iov_base = (void *)queries[i], .iov_len = qlens[i] } },
278 0 : .msg_control = NULL,
279 0 : .msg_controllen = 0,
280 0 : .msg_flags = 0
281 0 : };
282 0 : step_mh( &mh, qpos[i] );
283 0 : int r = sendmsg( pfd[i].fd, &mh, MSG_NOSIGNAL );
284 0 : if( r < 0 ) goto out;
285 0 : qpos[i] += r;
286 0 : if( qpos[i] == qlens[i]+2 )
287 0 : pfd[i].events = POLLIN;
288 0 : }
289 :
290 0 : for( i=0; i<nqueries; i++ ) if( pfd[i].revents & POLLIN ) {
291 0 : struct msghdr mh = {
292 0 : .msg_iovlen = 2,
293 0 : .msg_iov = (struct iovec [2]){
294 0 : { .iov_base = alen_buf[i], .iov_len = 2 },
295 0 : { .iov_base = answers[i], .iov_len = asize } },
296 0 : .msg_control = NULL,
297 0 : .msg_controllen = 0,
298 0 : .msg_flags = 0
299 0 : };
300 0 : step_mh( &mh, apos[i] );
301 0 : int r = recvmsg( pfd[i].fd, &mh, 0 );
302 0 : if( r <= 0 ) goto out;
303 0 : apos[i] += r;
304 0 : if( apos[i] < 2 ) continue;
305 0 : int alen = alen_buf[i][0]*256 + alen_buf[i][1];
306 0 : if( alen < 13 ) goto out;
307 0 : if( apos[i] < alen+2 && apos[i] < asize+2 )
308 0 : continue;
309 0 : int rcode = answers[i][3] & 15;
310 0 : if( rcode != 0 && rcode != 3 )
311 0 : goto out;
312 :
313 : /* Storing the length here commits the accepted answer.
314 : Immediately close TCP socket so as not to consume
315 : resources we no longer need. */
316 0 : alens[i] = alen;
317 0 : syscall( SYS_close, pfd[i].fd );
318 0 : pfd[i].fd = -1;
319 0 : }
320 0 : }
321 0 : out:
322 0 : cleanup( pfd );
323 :
324 : /* Disregard any incomplete TCP results */
325 0 : for( i=0; i<nqueries; i++ ) if( alens[i]<0 ) alens[i] = 0;
326 :
327 0 : return 0;
328 0 : }
|