Line data Source code
1 : #include "fd_snapshot_http.h"
2 : #include "../../ballet/http/picohttpparser.h"
3 : #include "fd_snapshot.h"
4 :
5 : #include <assert.h>
6 : #include <errno.h>
7 : #include <stdlib.h>
8 : #include <strings.h>
9 : #include <unistd.h>
10 : #include <netinet/in.h>
11 : #include <netinet/ip.h>
12 : #include <sys/socket.h>
13 : #include <sys/types.h>
14 :
15 : /* fd_snapshot_http_set_path renders the 'GET /path' chunk of the HTTP
16 : request. The chunk is right aligned and is followed immediately by
17 : 'HTTP/1.1\r\n...' to form a contiguous message. */
18 :
19 : int
20 : fd_snapshot_http_set_path( fd_snapshot_http_t * this,
21 : char const * path,
22 : ulong path_len,
23 6 : ulong base_slot ) {
24 :
25 6 : if( FD_UNLIKELY( !path_len ) ) {
26 0 : path = "/";
27 0 : path_len = 1UL;
28 0 : }
29 :
30 6 : if( FD_UNLIKELY( path_len > FD_SNAPSHOT_HTTP_REQ_PATH_MAX ) ) {
31 0 : FD_LOG_DEBUG(( "http: path too long (%lu chars)", path_len ));
32 0 : return 0;
33 0 : }
34 :
35 6 : ulong off = sizeof(this->path) - path_len - 4;
36 6 : char * p = this->path + off;
37 :
38 6 : fd_memcpy( p, "GET ", 4UL );
39 6 : fd_memcpy( p+4, path, path_len );
40 :
41 6 : this->req_tail = (ushort)off;
42 6 : this->path_off = (ushort)off;
43 :
44 6 : this->base_slot = base_slot;
45 6 : return 1;
46 6 : }
47 :
48 : fd_snapshot_http_t *
49 : fd_snapshot_http_new( void * mem,
50 : const char * dst_str,
51 : uint dst_ipv4,
52 : ushort dst_port,
53 6 : fd_snapshot_name_t * name_out ) {
54 :
55 6 : fd_snapshot_http_t * this = (fd_snapshot_http_t *)mem;
56 6 : if( FD_UNLIKELY( !this ) ) {
57 0 : FD_LOG_WARNING(( "NULL mem" ));
58 0 : return NULL;
59 0 : }
60 :
61 6 : fd_memset( this, 0, sizeof(fd_snapshot_http_t) );
62 6 : this->next_ipv4 = dst_ipv4;
63 6 : this->next_port = dst_port;
64 6 : this->socket_fd = -1;
65 6 : this->state = FD_SNAPSHOT_HTTP_STATE_INIT;
66 6 : this->req_timeout = 10e9; /* 10s */
67 6 : this->hops = 5;
68 6 : this->name_out = name_out;
69 6 : if( !this->name_out ) this->name_out = this->name_dummy;
70 6 : fd_memset( this->name_out, 0, sizeof(fd_snapshot_name_t) );
71 :
72 : /* Right-aligned render the request path */
73 :
74 6 : static char const default_path[] = "/snapshot.tar.bz2";
75 6 : int path_ok = fd_snapshot_http_set_path( this, default_path, sizeof(default_path)-1, 0UL );
76 6 : assert( path_ok );
77 :
78 : /* Left-aligned render the headers, completing the message */
79 :
80 0 : char * p = fd_cstr_init( this->req_hdrs );
81 6 : static char const hdr_part1[] =
82 6 : " HTTP/1.1\r\n"
83 6 : "user-agent: Firedancer\r\n"
84 6 : "accept: */*\r\n"
85 6 : "accept-encoding: identity\r\n"
86 6 : "host: ";
87 6 : p = fd_cstr_append_text( p, hdr_part1, sizeof(hdr_part1)-1 );
88 :
89 6 : p = fd_cstr_append_text( p, dst_str, strlen(dst_str) );
90 :
91 6 : static char const hdr_part2[] =
92 6 : "\r\n"
93 6 : "\r\n";
94 6 : p = fd_cstr_append_text( p, hdr_part2, sizeof(hdr_part2)-1 );
95 :
96 6 : this->req_head = (ushort)( p - this->req_buf );
97 :
98 6 : return this;
99 6 : }
100 :
101 : void *
102 6 : fd_snapshot_http_delete( fd_snapshot_http_t * this ) {
103 6 : if( FD_UNLIKELY( !this ) ) return NULL;
104 6 : if( this->socket_fd>=0 ) {
105 3 : close( this->socket_fd );
106 3 : this->socket_fd = -1;
107 3 : }
108 6 : return (void *)this;
109 6 : }
110 :
111 : /* fd_snapshot_http_init gets called the first time an object is polled
112 : for snapshot data. Creates a new outgoing TCP connection. */
113 :
114 : static int
115 0 : fd_snapshot_http_init( fd_snapshot_http_t * this ) {
116 :
117 0 : FD_LOG_INFO(( "Connecting to " FD_IP4_ADDR_FMT ":%u ...",
118 0 : FD_IP4_ADDR_FMT_ARGS( this->next_ipv4 ), this->next_port ));
119 :
120 0 : this->req_deadline = fd_log_wallclock() + this->req_timeout;
121 :
122 0 : this->socket_fd = socket( AF_INET, SOCK_STREAM, 0 );
123 0 : if( FD_UNLIKELY( this->socket_fd < 0 ) ) {
124 0 : FD_LOG_WARNING(( "socket(AF_INET, SOCK_STREAM, 0) failed (%d-%s)",
125 0 : errno, fd_io_strerror( errno ) ));
126 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
127 0 : return errno;
128 0 : }
129 :
130 0 : int optval = 4<<20;
131 0 : if( setsockopt( this->socket_fd, SOL_SOCKET, SO_RCVBUF, (char *)&optval, sizeof(int) ) < 0 ) {
132 0 : FD_LOG_WARNING(( "setsockopt failed (%d-%s)",
133 0 : errno, fd_io_strerror( errno ) ));
134 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
135 0 : return errno;
136 0 : }
137 :
138 0 : struct sockaddr_in addr = {
139 0 : .sin_family = AF_INET,
140 0 : .sin_addr = { .s_addr = this->next_ipv4 },
141 0 : .sin_port = fd_ushort_bswap( this->next_port ),
142 0 : };
143 :
144 : /* TODO consider using O_NONBLOCK socket so we can control the
145 : connect timeout interval*/
146 :
147 0 : if( 0!=connect( this->socket_fd, fd_type_pun_const( &addr ), sizeof(struct sockaddr_in) ) ) {
148 0 : FD_LOG_WARNING(( "connect(%d," FD_IP4_ADDR_FMT ":%u) failed (%d-%s)",
149 0 : this->socket_fd,
150 0 : FD_IP4_ADDR_FMT_ARGS( this->next_ipv4 ), this->next_port,
151 0 : errno, fd_io_strerror( errno ) ));
152 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
153 0 : return errno;
154 0 : }
155 :
156 0 : FD_LOG_INFO(( "Sending request" ));
157 :
158 0 : this->state = FD_SNAPSHOT_HTTP_STATE_REQ;
159 0 : return 0;
160 0 : }
161 :
162 : /* fd_snapshot_http_req writes out the request. */
163 :
164 : static int
165 3 : fd_snapshot_http_req( fd_snapshot_http_t * this ) {
166 :
167 3 : long now = fd_log_wallclock();
168 3 : long deadline = this->req_deadline;
169 :
170 3 : if( FD_UNLIKELY( now > deadline ) ) {
171 0 : FD_LOG_WARNING(( "Timed out while sending request." ));
172 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
173 0 : return ETIMEDOUT;
174 0 : }
175 :
176 3 : int socket_fd = this->socket_fd;
177 :
178 3 : uint avail_sz = (uint)this->req_head - (uint)this->req_tail;
179 3 : assert( avail_sz < sizeof(this->req_buf) );
180 0 : long sent_sz = send( socket_fd, this->req_buf + this->req_tail, avail_sz, MSG_DONTWAIT|MSG_NOSIGNAL );
181 3 : if( sent_sz<0L ) {
182 0 : if( FD_UNLIKELY( errno!=EWOULDBLOCK ) ) {
183 0 : FD_LOG_WARNING(( "send(%d,%p,%u) failed (%d-%s)",
184 0 : socket_fd, (void *)(this->req_buf + this->req_tail), avail_sz,
185 0 : errno, fd_io_strerror( errno ) ));
186 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
187 0 : return errno;
188 0 : } else {
189 0 : return 0;
190 0 : }
191 0 : }
192 :
193 3 : this->req_tail = (ushort)( this->req_tail + (uint)sent_sz );
194 3 : if( this->req_tail == this->req_head )
195 3 : this->state = FD_SNAPSHOT_HTTP_STATE_RESP;
196 :
197 3 : return 0;
198 3 : }
199 :
200 : /* fd_snapshot_http_follow_redirect winds up the state machine for a
201 : redirect. */
202 :
203 : static int
204 : fd_snapshot_http_follow_redirect( fd_snapshot_http_t * this,
205 : struct phr_header const * headers,
206 0 : ulong header_cnt ) {
207 :
208 0 : assert( this->hops > 0 );
209 0 : this->hops--;
210 :
211 : /* Look for location header */
212 :
213 0 : char const * loc = NULL;
214 0 : ulong loc_len;
215 0 : for( ulong i = 0; i<header_cnt; i++ ) {
216 0 : if( 0==strncasecmp( headers[i].name, "location", headers[i].name_len ) ) {
217 0 : loc = headers[i].value;
218 0 : loc_len = headers[i].value_len;
219 0 : break;
220 0 : }
221 0 : }
222 0 : if( FD_UNLIKELY( !loc ) ) {
223 0 : FD_LOG_WARNING(( "Invalid redirect (no location header)" ));
224 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
225 0 : return EINVAL;
226 0 : }
227 :
228 : /* Validate character set (TODO too restrictive?) */
229 :
230 0 : if( FD_UNLIKELY( loc_len > FD_SNAPSHOT_HTTP_REQ_PATH_MAX ) ) {
231 0 : FD_LOG_WARNING(( "Redirect location too long" ));
232 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
233 0 : return EINVAL;
234 0 : }
235 0 : if( FD_UNLIKELY( loc_len==0 || loc[0] != '/' ) ) {
236 0 : FD_LOG_WARNING(( "Redirect is not an absolute path on the current host. Refusing to follow." ));
237 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
238 0 : return EPROTO;
239 0 : }
240 0 : for( ulong j=0UL; j<loc_len; j++ ) {
241 0 : int c = loc[j];
242 0 : int c_ok = ( (c>='a') & (c<='z') ) |
243 0 : ( (c>='A') & (c<='Z') ) |
244 0 : ( (c>='0') & (c<='9') ) |
245 0 : (c=='.') | (c=='/') | (c=='-') | (c=='_') |
246 0 : (c=='+') | (c=='=') | (c=='&');
247 0 : if( FD_UNLIKELY( !c_ok ) ) {
248 0 : FD_LOG_WARNING(( "Invalid char '0x%02x' in redirect location", (uint)c ));
249 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
250 0 : return EPROTO;
251 0 : }
252 0 : }
253 :
254 : /* Re-initialize */
255 :
256 0 : FD_LOG_NOTICE(( "Following redirect to %.*s", (int)loc_len, loc ));
257 :
258 0 : if( FD_UNLIKELY( !fd_snapshot_name_from_buf( this->name_out, loc, loc_len, this->base_slot ) ) ) {
259 0 : return EPROTO;
260 0 : }
261 :
262 0 : int set_path_ok = fd_snapshot_http_set_path( this, loc, loc_len, this->base_slot );
263 0 : assert( set_path_ok );
264 :
265 0 : this->req_deadline = fd_log_wallclock() + this->req_timeout;
266 0 : this->state = FD_SNAPSHOT_HTTP_STATE_REQ;
267 0 : this->resp_tail = 0U;
268 0 : this->resp_head = 0U;
269 :
270 0 : return 0;
271 0 : }
272 :
273 : /* fd_snapshot_http_resp waits for response headers. */
274 :
275 : static int
276 3 : fd_snapshot_http_resp( fd_snapshot_http_t * this ) {
277 3 : long now = fd_log_wallclock();
278 3 : long deadline = this->req_deadline;
279 :
280 3 : if( FD_UNLIKELY( now > deadline ) ) {
281 0 : FD_LOG_WARNING(( "Timed out while receiving response headers." ));
282 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
283 0 : return ETIMEDOUT;
284 0 : }
285 :
286 : /* Do blocking read of TCP data until timeout */
287 :
288 3 : int socket_fd = this->socket_fd;
289 :
290 3 : uchar * next = this->resp_buf + this->resp_head;
291 3 : ulong bufsz = FD_SNAPSHOT_HTTP_RESP_BUF_MAX - this->resp_head;
292 3 : assert( this->resp_head <= FD_SNAPSHOT_HTTP_RESP_BUF_MAX );
293 :
294 0 : long recv_sz = recv( socket_fd, next, bufsz, MSG_DONTWAIT );
295 3 : if( recv_sz<0L ) {
296 0 : if( FD_UNLIKELY( errno!=EWOULDBLOCK ) ) {
297 0 : FD_LOG_WARNING(( "recv(%d,%p,%lu) failed (%d-%s)",
298 0 : socket_fd, (void *)next, bufsz,
299 0 : errno, fd_io_strerror( errno ) ));
300 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
301 0 : return errno;
302 0 : } else {
303 0 : return 0;
304 0 : }
305 3 : } else if( recv_sz==0L ) {
306 0 : return 0;
307 0 : }
308 :
309 : /* Attempt to parse response. (Might fail due to incomplete response) */
310 :
311 3 : ulong last_len = this->resp_head;
312 3 : this->resp_head += (uint)recv_sz;
313 3 : assert( this->resp_head <= FD_SNAPSHOT_HTTP_RESP_BUF_MAX );
314 :
315 0 : int minor_version;
316 3 : int status;
317 3 : char const * msg_start;
318 3 : ulong msg_len;
319 3 : struct phr_header headers[ FD_SNAPSHOT_HTTP_RESP_HDR_CNT ];
320 3 : ulong header_cnt = FD_SNAPSHOT_HTTP_RESP_HDR_CNT;
321 3 : int parse_res =
322 3 : phr_parse_response( (const char *)this->resp_buf,
323 3 : this->resp_head,
324 3 : &minor_version,
325 3 : &status,
326 3 : &msg_start,
327 3 : &msg_len,
328 3 : headers,
329 3 : &header_cnt,
330 3 : last_len );
331 :
332 3 : if( FD_UNLIKELY( parse_res==-1 ) ) {
333 0 : FD_LOG_HEXDUMP_NOTICE(( "Failed HTTP response", this->resp_buf, this->resp_head ));
334 0 : FD_LOG_WARNING(( "Failed to parse HTTP response." ));
335 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
336 0 : return EPROTO;
337 0 : }
338 :
339 3 : if( parse_res==-2 ) return 0; /* response headers incomplete */
340 3 : assert( parse_res>=0 );
341 :
342 : /* OK, we parsed the response headers.
343 : Remember where the leftover tail started so we can later reuse it
344 : during response reading. */
345 :
346 0 : this->resp_tail = (uint)parse_res;
347 :
348 : /* Is it a redirect? If so, start over. */
349 :
350 3 : int is_redirect = (int)( (status==301) | (status==303) |
351 3 : (status==304) | (status==307) );
352 3 : if( FD_UNLIKELY( (!this->hops) & (is_redirect) ) ) {
353 0 : FD_LOG_WARNING(( "Too many redirects. Aborting." ));
354 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
355 0 : return ELOOP;
356 0 : }
357 :
358 3 : if( is_redirect )
359 0 : return fd_snapshot_http_follow_redirect( this, headers, header_cnt );
360 :
361 : /* Validate response header */
362 :
363 3 : if( FD_UNLIKELY( status!=200 ) ) {
364 0 : FD_LOG_WARNING(( "Unexpected HTTP status %d", status ));
365 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
366 0 : return EPROTO;
367 0 : }
368 :
369 : /* Find content-length */
370 :
371 3 : this->content_len = ULONG_MAX;
372 3 : const ulong target_len = sizeof("content-length")-1;
373 9 : for( ulong i = 0; i < header_cnt; ++i ) {
374 9 : if( headers[i].name_len==target_len && strncasecmp( headers[i].name, "content-length", target_len ) == 0 ) {
375 3 : this->content_len = strtoul( headers[i].value, NULL, 10 );
376 3 : break;
377 3 : }
378 9 : }
379 3 : if( this->content_len == ULONG_MAX ) {
380 0 : FD_LOG_WARNING(( "Missing content-length" ));
381 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
382 0 : return EPROTO;
383 0 : }
384 :
385 : /* Start downloading */
386 :
387 3 : if( FD_UNLIKELY( this->name_out->type == FD_SNAPSHOT_TYPE_UNSPECIFIED ) ) {
388 : /* We must not have followed a redirect. Try to parse here. */
389 3 : ulong off = (ulong)this->path_off + 4;
390 3 : if( FD_UNLIKELY( !fd_snapshot_name_from_buf( this->name_out, this->path + off, sizeof(this->path) - off, this->base_slot ) ) ) {
391 3 : FD_LOG_WARNING(( "Cannot download, snapshot hash is unknown" ));
392 3 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
393 3 : return EINVAL;
394 3 : }
395 3 : }
396 :
397 0 : this->state = FD_SNAPSHOT_HTTP_STATE_DL;
398 0 : return 0;
399 3 : }
400 :
401 : /* fd_snapshot_http_dl downloads bytes and returns them to the caller.
402 : No timeout set here. */
403 :
404 : static int
405 : fd_snapshot_http_dl( fd_snapshot_http_t * this,
406 : void * dst,
407 : ulong dst_max,
408 0 : ulong * dst_sz ) {
409 :
410 0 : if( this->resp_head == this->resp_tail ) {
411 0 : if( this->content_len == this->dl_total ) {
412 0 : FD_LOG_NOTICE(( "download complete at %lu MB", this->dl_total>>20 ));
413 0 : this->state = FD_SNAPSHOT_HTTP_STATE_DONE;
414 0 : close( this->socket_fd );
415 0 : this->socket_fd = -1;
416 0 : return -1;
417 0 : }
418 0 : this->resp_tail = this->resp_head = 0U;
419 0 : long recv_sz = recv( this->socket_fd, this->resp_buf,
420 0 : fd_ulong_min( this->content_len - this->dl_total, FD_SNAPSHOT_HTTP_RESP_BUF_MAX ),
421 0 : MSG_DONTWAIT );
422 0 : if( recv_sz<0L ) {
423 0 : if( FD_UNLIKELY( errno!=EWOULDBLOCK ) ) {
424 0 : FD_LOG_WARNING(( "recv(%d,%p,%lu) failed while downloading response body (%d-%s)",
425 0 : this->socket_fd, (void *)this->resp_buf, FD_SNAPSHOT_HTTP_RESP_BUF_MAX,
426 0 : errno, fd_io_strerror( errno ) ));
427 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
428 0 : return errno;
429 0 : } else {
430 0 : return 0;
431 0 : }
432 0 : }
433 0 : if( !recv_sz ) { /* Connection closed */
434 0 : FD_LOG_WARNING(( "connection closed at %lu MB", this->dl_total>>20 ));
435 0 : this->state = FD_SNAPSHOT_HTTP_STATE_FAIL;
436 0 : close( this->socket_fd );
437 0 : this->socket_fd = -1;
438 0 : return -1;
439 0 : }
440 0 : this->resp_head = (uint)recv_sz;
441 0 : #define DL_PERIOD (100UL<<20)
442 0 : ulong x = this->dl_total/DL_PERIOD;
443 0 : this->dl_total += (ulong)recv_sz;
444 0 : if( x != this->dl_total/DL_PERIOD ) {
445 0 : FD_LOG_NOTICE(( "downloaded %lu MB (%lu%%) ...",
446 0 : this->dl_total>>20U, 100LU*this->dl_total/this->content_len ));
447 0 : }
448 0 : }
449 :
450 0 : uint avail_sz = this->resp_head - this->resp_tail;
451 0 : ulong write_sz = fd_ulong_min( avail_sz, dst_max );
452 0 : fd_memcpy( dst, this->resp_buf + this->resp_tail, write_sz );
453 0 : *dst_sz = write_sz;
454 0 : this->resp_tail += (uint)write_sz;
455 0 : return 0;
456 0 : }
457 :
458 : /* fd_snapshot_http_req gets called when we are ready to send our HTTP
459 : request for the snapshot to the server. */
460 :
461 : int
462 : fd_io_istream_snapshot_http_read( void * _this,
463 : void * dst,
464 : ulong dst_max,
465 6 : ulong * dst_sz ) {
466 :
467 6 : fd_snapshot_http_t * this = (fd_snapshot_http_t *)_this;
468 :
469 6 : int err = 0;
470 6 : switch( this->state ) {
471 0 : case FD_SNAPSHOT_HTTP_STATE_INIT:
472 0 : err = fd_snapshot_http_init( this );
473 0 : break;
474 3 : case FD_SNAPSHOT_HTTP_STATE_REQ:
475 3 : err = fd_snapshot_http_req( this );
476 3 : break;
477 3 : case FD_SNAPSHOT_HTTP_STATE_RESP:
478 3 : err = fd_snapshot_http_resp( this );
479 3 : break;
480 0 : case FD_SNAPSHOT_HTTP_STATE_DL:
481 0 : return fd_snapshot_http_dl( this, dst, dst_max, dst_sz );
482 6 : }
483 :
484 : /* Not yet ready to read at this point. */
485 :
486 6 : *dst_sz = 0UL;
487 6 : return err;
488 6 : }
489 :
490 : fd_io_istream_vt_t const fd_io_istream_snapshot_http_vt = {
491 : .read = fd_io_istream_snapshot_http_read,
492 : };
|