Line data Source code
1 : #define _GNU_SOURCE
2 : #define _FILE_OFFSET_BITS 64
3 : #include "fd_funkier_filemap.h"
4 : #include <sys/types.h>
5 : #include <sys/stat.h>
6 : #include <unistd.h>
7 : #include <errno.h>
8 : #include <fcntl.h>
9 : #include <sys/mman.h>
10 :
11 0 : #define PAGESIZE (1UL<<12) /* 4 KiB */
12 :
13 : fd_funkier_t *
14 : fd_funkier_open_file( const char * filename,
15 : ulong wksp_tag,
16 : ulong seed,
17 : ulong txn_max,
18 : ulong rec_max,
19 : ulong total_sz,
20 : fd_funkier_file_mode_t mode,
21 0 : fd_funkier_close_file_args_t * close_args_out ) {
22 :
23 : /* See if we already have the file open */
24 :
25 0 : if( mode == FD_FUNKIER_READONLY || mode == FD_FUNKIER_READ_WRITE ) {
26 0 : fd_shmem_join_info_t info;
27 0 : if( !fd_shmem_join_query_by_name("funk", &info) ) {
28 0 : void * shmem = info.join;
29 0 : fd_wksp_t * wksp = fd_wksp_join( shmem );
30 0 : if( FD_UNLIKELY( !wksp ) ) {
31 0 : FD_LOG_WARNING(( "fd_wksp_join(%p) failed", shmem ));
32 0 : return NULL;
33 0 : }
34 :
35 0 : fd_wksp_tag_query_info_t info2;
36 0 : if( FD_UNLIKELY( !fd_wksp_tag_query( wksp, &wksp_tag, 1, &info2, 1 ) ) ) {
37 0 : FD_LOG_WARNING(( "%s does not contain a funky", filename ));
38 0 : return NULL;
39 0 : }
40 :
41 0 : void * funk_shmem = fd_wksp_laddr_fast( wksp, info2.gaddr_lo );
42 0 : fd_funkier_t * funk = fd_funkier_join( funk_shmem );
43 0 : if( FD_UNLIKELY( funk == NULL ) ) {
44 0 : FD_LOG_WARNING(( "failed to join a funky" ));
45 0 : return NULL;
46 0 : }
47 :
48 0 : if( FD_UNLIKELY( close_args_out != NULL ) ) {
49 0 : close_args_out->shmem = shmem;
50 0 : close_args_out->fd = -1;
51 0 : close_args_out->total_sz = 0;
52 0 : }
53 0 : return funk;
54 0 : }
55 0 : }
56 :
57 : /* Open the file */
58 :
59 0 : int open_flags, can_resize, can_create, do_new;
60 0 : switch (mode) {
61 0 : case FD_FUNKIER_READONLY:
62 0 : if( filename == NULL || filename[0] == '\0' ) {
63 0 : FD_LOG_WARNING(( "mode FD_FUNKIER_READONLY can not be used with an anonymous workspace, funk file required" ));
64 0 : return NULL;
65 0 : }
66 0 : open_flags = O_RDWR; /* We mark the memory as read-only after we are done setting up */
67 0 : can_create = 0;
68 0 : can_resize = 0;
69 0 : do_new = 0;
70 0 : break;
71 0 : case FD_FUNKIER_READ_WRITE:
72 0 : if( filename == NULL || filename[0] == '\0' ) {
73 0 : FD_LOG_WARNING(( "mode FD_FUNKIER_READ_WRITE can not be used with an anonymous workspace, funk file required" ));
74 0 : return NULL;
75 0 : }
76 0 : open_flags = O_RDWR;
77 0 : can_create = 0;
78 0 : can_resize = 0;
79 0 : do_new = 0;
80 0 : break;
81 0 : case FD_FUNKIER_CREATE:
82 0 : open_flags = O_CREAT|O_RDWR;
83 0 : can_create = 1;
84 0 : can_resize = 0;
85 0 : do_new = 0;
86 0 : break;
87 0 : case FD_FUNKIER_OVERWRITE:
88 0 : open_flags = O_CREAT|O_RDWR;
89 0 : can_create = 1;
90 0 : can_resize = 1;
91 0 : do_new = 1;
92 0 : break;
93 0 : case FD_FUNKIER_CREATE_EXCL:
94 0 : open_flags = O_CREAT|O_EXCL|O_RDWR;
95 0 : can_create = 1;
96 0 : can_resize = 1;
97 0 : do_new = 1;
98 0 : break;
99 0 : default:
100 0 : FD_LOG_WARNING(( "invalid mode when opening %s", filename ));
101 0 : return NULL;
102 0 : }
103 :
104 0 : int fd;
105 0 : if( FD_UNLIKELY( filename == NULL || filename[0] == '\0' ) ) {
106 0 : fd = -1; /* Anonymous */
107 0 : do_new = 1;
108 0 : } else {
109 :
110 : /* Open the file */
111 0 : FD_LOG_DEBUG(( "opening %s", filename ));
112 0 : fd = open( filename, open_flags, S_IRUSR|S_IWUSR );
113 0 : if( FD_UNLIKELY( fd < 0 ) ) {
114 0 : FD_LOG_WARNING(( "error opening %s: %s", filename, strerror(errno) ));
115 0 : return NULL;
116 0 : }
117 :
118 : /* Resize the file */
119 :
120 0 : struct stat statbuf;
121 0 : int r = fstat( fd, &statbuf );
122 0 : if( FD_UNLIKELY( r < 0 ) ) {
123 0 : FD_LOG_WARNING(( "error opening %s: %s", filename, strerror(errno) ));
124 0 : close( fd );
125 0 : return NULL;
126 0 : }
127 0 : if( (can_create && statbuf.st_size == 0) ||
128 0 : (can_resize && statbuf.st_size != (off_t)total_sz) ) {
129 0 : FD_LOG_DEBUG(( "resizing %s to %lu", filename, total_sz ));
130 0 : if( FD_UNLIKELY( ftruncate( fd, (off_t)total_sz ) < 0 ) ) {
131 0 : FD_LOG_WARNING(( "error resizing %s: %s", filename, strerror(errno) ));
132 0 : close( fd );
133 0 : return NULL;
134 0 : }
135 0 : do_new = 1;
136 0 : } else {
137 0 : total_sz = (ulong)statbuf.st_size;
138 0 : }
139 0 : }
140 :
141 0 : if( FD_UNLIKELY( total_sz & (PAGESIZE-1) ) ) {
142 0 : FD_LOG_WARNING(( "file size must be a multiple of a %lu", PAGESIZE ));
143 0 : close( fd );
144 0 : return NULL;
145 0 : }
146 :
147 : /* Force all the disk blocks to be physically allocated to avoid major faults in the future */
148 :
149 0 : if( do_new & (fd != -1) ) {
150 0 : FD_LOG_DEBUG(( "zeroing %s", (filename ? filename : "(NULL)") ));
151 0 : uchar zeros[4<<20];
152 0 : memset( zeros, 0, sizeof(zeros) );
153 0 : for( ulong i = 0; i < total_sz; ) {
154 0 : ulong sz = fd_ulong_min( sizeof(zeros), total_sz - i );
155 0 : if( FD_UNLIKELY( pwrite( fd, zeros, sz, (__off_t)i ) < (ssize_t)sz ) ) {
156 0 : FD_LOG_WARNING(( "error zeroing %s: %s", (filename ? filename : "(NULL)"), strerror(errno) ));
157 0 : close( fd );
158 0 : return NULL;
159 0 : }
160 0 : sync_file_range( fd, (__off64_t)i, (__off64_t)sz, SYNC_FILE_RANGE_WRITE );
161 0 : i += sz;
162 0 : }
163 0 : }
164 :
165 : /* Create the memory map */
166 :
167 0 : FD_LOG_DEBUG(( "mapping %s", (filename ? filename : "(NULL)") ));
168 0 : void * shmem = mmap( NULL, total_sz, (PROT_READ|PROT_WRITE),
169 0 : (fd == -1 ? (MAP_ANONYMOUS|MAP_PRIVATE) : MAP_SHARED), fd, 0 );
170 0 : if( FD_UNLIKELY ( shmem == MAP_FAILED ) ) {
171 0 : FD_LOG_WARNING(( "error mapping %s: %s", (filename ? filename : "(NULL)"), strerror(errno) ));
172 0 : close( fd );
173 0 : return NULL;
174 0 : }
175 :
176 0 : if( do_new ) {
177 :
178 : /* Create the data structures */
179 :
180 0 : ulong part_max = fd_wksp_part_max_est( total_sz, 1U<<18U );
181 0 : if( FD_UNLIKELY( !part_max ) ) {
182 0 : FD_LOG_WARNING(( "fd_wksp_part_max_est(%lu,64KiB) failed", total_sz ));
183 0 : munmap( shmem, total_sz );
184 0 : close( fd );
185 0 : return NULL;
186 0 : }
187 :
188 0 : ulong data_max = fd_wksp_data_max_est( total_sz, part_max );
189 0 : if( FD_UNLIKELY( !data_max ) ) {
190 0 : FD_LOG_WARNING(( "part_max (%lu) too large for footprint %lu", part_max, total_sz ));
191 0 : munmap( shmem, total_sz );
192 0 : close( fd );
193 0 : return NULL;
194 0 : }
195 :
196 0 : FD_LOG_DEBUG(( "creating workspace in %s", (filename ? filename : "(NULL)") ));
197 0 : void * shwksp = fd_wksp_new( shmem, "funk", (uint)seed, part_max, data_max );
198 0 : if( FD_UNLIKELY( !shwksp ) ) {
199 0 : FD_LOG_WARNING(( "fd_wksp_new(%p,\"%s\",%lu,%lu,%lu) failed", shmem, "funk", seed, part_max, data_max ));
200 0 : munmap( shmem, total_sz );
201 0 : close( fd );
202 0 : return NULL;
203 0 : }
204 :
205 0 : fd_wksp_t * wksp = fd_wksp_join( shwksp );
206 0 : if( FD_UNLIKELY( !wksp ) ) {
207 0 : FD_LOG_WARNING(( "fd_wksp_join(%p) failed", shwksp ));
208 0 : munmap( shmem, total_sz );
209 0 : close( fd );
210 0 : return NULL;
211 0 : }
212 :
213 0 : ulong page_sz = PAGESIZE;
214 0 : ulong page_cnt = total_sz/PAGESIZE;
215 0 : int join_err = fd_shmem_join_anonymous( "funk", FD_SHMEM_JOIN_MODE_READ_WRITE, wksp, shmem, page_sz, page_cnt );
216 0 : if( join_err ) {
217 0 : FD_LOG_WARNING(( "fd_shmem_join_anonymous failed" ));
218 0 : }
219 :
220 0 : FD_LOG_DEBUG(( "creating funk in %s", (filename ? filename : "(NULL)") ));
221 0 : void * funk_shmem = fd_wksp_alloc_laddr( wksp, fd_funkier_align(), fd_funkier_footprint( txn_max, rec_max ), wksp_tag );
222 0 : if( FD_UNLIKELY(funk_shmem == NULL ) ) {
223 0 : FD_LOG_WARNING(( "failed to allocate a funky" ));
224 0 : munmap( shmem, total_sz );
225 0 : close( fd );
226 0 : return NULL;
227 0 : }
228 :
229 0 : fd_funkier_t * funk = fd_funkier_join( fd_funkier_new( funk_shmem, wksp_tag, seed, txn_max, rec_max ) );
230 0 : if( FD_UNLIKELY( funk == NULL ) ) {
231 0 : FD_LOG_WARNING(( "failed to allocate a funky" ));
232 0 : munmap( shmem, total_sz );
233 0 : close( fd );
234 0 : return NULL;
235 0 : }
236 :
237 0 : FD_LOG_NOTICE(( "opened funk size %f GB, backing file %s", ((double)total_sz)/((double)(1LU<<30)), (filename ? filename : "(NULL)") ));
238 :
239 0 : if( FD_UNLIKELY( close_args_out != NULL ) ) {
240 0 : close_args_out->shmem = shmem;
241 0 : close_args_out->fd = fd;
242 0 : close_args_out->total_sz = total_sz;
243 0 : }
244 0 : return funk;
245 :
246 0 : } else {
247 :
248 : /* Join the data existing structures */
249 :
250 0 : fd_wksp_t * wksp = fd_wksp_join( shmem );
251 0 : if( FD_UNLIKELY( !wksp ) ) {
252 0 : FD_LOG_WARNING(( "fd_wksp_join(%p) failed", shmem ));
253 0 : munmap( shmem, total_sz );
254 0 : close( fd );
255 0 : return NULL;
256 0 : }
257 :
258 0 : ulong page_sz = PAGESIZE;
259 0 : ulong page_cnt = total_sz/PAGESIZE;
260 0 : int join_err = fd_shmem_join_anonymous( "funk", FD_SHMEM_JOIN_MODE_READ_WRITE, wksp, shmem, page_sz, page_cnt );
261 0 : if( FD_UNLIKELY( join_err ) ) {
262 0 : FD_LOG_WARNING(( "fd_shmem_join_anonymous failed" ));
263 0 : }
264 :
265 0 : fd_wksp_tag_query_info_t info;
266 0 : if( FD_UNLIKELY( !fd_wksp_tag_query( wksp, &wksp_tag, 1, &info, 1 ) ) ) {
267 0 : FD_LOG_WARNING(( "%s does not contain a funky", filename ));
268 0 : munmap( shmem, total_sz );
269 0 : close( fd );
270 0 : return NULL;
271 0 : }
272 :
273 0 : void * funk_shmem = fd_wksp_laddr_fast( wksp, info.gaddr_lo );
274 0 : fd_funkier_t * funk = fd_funkier_join( funk_shmem );
275 0 : if( FD_UNLIKELY( funk == NULL ) ) {
276 0 : FD_LOG_WARNING(( "failed to join a funky" ));
277 0 : munmap( shmem, total_sz );
278 0 : close( fd );
279 0 : return NULL;
280 0 : }
281 :
282 0 : if( mode == FD_FUNKIER_READONLY ) {
283 0 : if( FD_UNLIKELY( mprotect( shmem, total_sz, PROT_READ ) ) ) {
284 0 : FD_LOG_WARNING(( "mprotect failed (%i-%s)", errno, fd_io_strerror( errno ) ));
285 0 : }
286 0 : }
287 :
288 0 : FD_LOG_NOTICE(( "opened funk size %f GB, backing file %s", ((double)total_sz)/((double)(1LU<<30)), (filename ? filename : "(NULL)") ));
289 :
290 0 : if( FD_UNLIKELY( close_args_out != NULL ) ) {
291 0 : close_args_out->shmem = shmem;
292 0 : close_args_out->fd = fd;
293 0 : close_args_out->total_sz = total_sz;
294 0 : }
295 0 : return funk;
296 0 : }
297 0 : }
298 :
299 : fd_funkier_t *
300 : fd_funkier_recover_checkpoint( const char * funk_filename,
301 : ulong wksp_tag,
302 : const char * checkpt_filename,
303 0 : fd_funkier_close_file_args_t * close_args_out ) {
304 : /* Make the funk workspace match the parameters used to create the
305 : checkpoint. */
306 :
307 0 : fd_wksp_preview_t preview[1];
308 0 : int err = fd_wksp_preview( checkpt_filename, preview );
309 0 : if( FD_UNLIKELY( err ) ) {
310 0 : FD_LOG_WARNING(( "unable to preview %s (%i-%s)", checkpt_filename, err, fd_wksp_strerror( err ) ));
311 0 : return NULL;
312 0 : }
313 0 : uint seed = preview->seed;
314 0 : ulong part_max = preview->part_max;
315 0 : ulong data_max = preview->data_max;
316 :
317 0 : ulong total_sz = fd_wksp_footprint( part_max, data_max );
318 :
319 0 : int fd;
320 0 : if( funk_filename == NULL || funk_filename[0] == '\0' ) {
321 0 : fd = -1; /* Anonymous */
322 :
323 0 : } else {
324 :
325 : /* Open the file */
326 0 : fd = open( funk_filename, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR );
327 0 : if( FD_UNLIKELY( fd < 0 ) ) {
328 0 : FD_LOG_WARNING(( "error opening %s: %s", funk_filename, strerror(errno) ));
329 0 : return NULL;
330 0 : }
331 :
332 : /* Resize the file */
333 :
334 0 : struct stat statbuf;
335 0 : int r = fstat( fd, &statbuf );
336 0 : if( FD_UNLIKELY( r < 0 ) ) {
337 0 : FD_LOG_WARNING(( "error opening %s: %s", funk_filename, strerror(errno) ));
338 0 : close( fd );
339 0 : return NULL;
340 0 : }
341 0 : if( statbuf.st_size != (off_t)total_sz ) {
342 0 : if( FD_UNLIKELY( ftruncate( fd, (off_t)total_sz ) < 0 ) ) {
343 0 : FD_LOG_WARNING(( "error resizing %s: %s", funk_filename, strerror(errno) ));
344 0 : close( fd );
345 0 : return NULL;
346 0 : }
347 0 : }
348 :
349 : /* Force all the disk blocks to be physically allocated to avoid major faults in the future */
350 :
351 0 : uchar zeros[4<<20];
352 0 : memset( zeros, 0, sizeof(zeros) );
353 0 : for( ulong i = 0; i < total_sz; ) {
354 0 : ulong sz = fd_ulong_min( sizeof(zeros), total_sz - i );
355 0 : if( FD_UNLIKELY ( pwrite( fd, zeros, sz, (__off_t)i ) < (ssize_t)sz ) ) {
356 0 : FD_LOG_WARNING(( "error zeroing %s: %s", (funk_filename ? funk_filename : "(NULL)"), strerror(errno) ));
357 0 : close( fd );
358 0 : return NULL;
359 0 : }
360 0 : sync_file_range( fd, (__off64_t)i, (__off64_t)sz, SYNC_FILE_RANGE_WRITE );
361 0 : i += sz;
362 0 : }
363 0 : }
364 :
365 : /* Create the memory map */
366 :
367 0 : void * shmem = mmap( NULL, total_sz, PROT_READ|PROT_WRITE,
368 0 : (fd == -1 ? (MAP_ANONYMOUS|MAP_PRIVATE) : MAP_SHARED), fd, 0 );
369 :
370 0 : if( FD_UNLIKELY( shmem == MAP_FAILED ) ) {
371 0 : FD_LOG_WARNING(( "error mapping %s: %s", (funk_filename ? funk_filename : "(NULL)"), strerror(errno) ));
372 0 : close( fd );
373 0 : return NULL;
374 0 : }
375 :
376 : /* Create the workspace */
377 :
378 0 : void * shwksp = fd_wksp_new( shmem, "funk", seed, part_max, data_max );
379 0 : if( FD_UNLIKELY( !shwksp ) ) {
380 0 : FD_LOG_WARNING(( "fd_wksp_new(%p,\"%s\",%u,%lu,%lu) failed", shmem, "funk", seed, part_max, data_max ));
381 0 : munmap( shmem, total_sz );
382 0 : close( fd );
383 0 : return NULL;
384 0 : }
385 :
386 0 : fd_wksp_t * wksp = fd_wksp_join( shwksp );
387 0 : if( FD_UNLIKELY( !wksp ) ) {
388 0 : FD_LOG_WARNING(( "fd_wksp_join(%p) failed", shwksp ));
389 0 : munmap( shmem, total_sz );
390 0 : close( fd );
391 0 : return NULL;
392 0 : }
393 :
394 0 : ulong page_sz = PAGESIZE;
395 0 : ulong page_cnt = total_sz/PAGESIZE;
396 0 : int join_err = fd_shmem_join_anonymous( "funk", FD_SHMEM_JOIN_MODE_READ_WRITE, wksp, shmem, page_sz, page_cnt );
397 0 : if( FD_UNLIKELY( join_err ) ) {
398 0 : FD_LOG_WARNING(( "fd_shmem_join_anonymous failed" ));
399 0 : munmap( shmem, total_sz );
400 0 : close( fd );
401 0 : return NULL;
402 0 : }
403 :
404 : /* Restore the checkpoint */
405 :
406 0 : if( fd_wksp_restore( wksp, checkpt_filename, seed ) ) {
407 0 : FD_LOG_WARNING(( "restoring %s failed", checkpt_filename ));
408 0 : munmap( shmem, total_sz );
409 0 : close( fd );
410 0 : return NULL;
411 0 : }
412 :
413 : /* Let's play find the funk */
414 :
415 0 : fd_wksp_tag_query_info_t info;
416 0 : if( FD_UNLIKELY( !fd_wksp_tag_query( wksp, &wksp_tag, 1, &info, 1 ) ) ) {
417 0 : FD_LOG_WARNING(( "%s does not contain a funky", checkpt_filename ));
418 0 : munmap( shmem, total_sz );
419 0 : close( fd );
420 0 : return NULL;
421 0 : }
422 :
423 0 : void * funk_shmem = fd_wksp_laddr_fast( wksp, info.gaddr_lo );
424 0 : fd_funkier_t * funk = fd_funkier_join( funk_shmem );
425 0 : if( FD_UNLIKELY( funk == NULL ) ) {
426 0 : FD_LOG_WARNING(( "failed to join a funky" ));
427 0 : munmap( shmem, total_sz );
428 0 : close( fd );
429 0 : return NULL;
430 0 : }
431 :
432 0 : FD_LOG_NOTICE(( "opened funk size %f GB, backing file %s", ((double)total_sz)/((double)(1LU<<30)), (funk_filename ? funk_filename : "(NULL)") ));
433 :
434 0 : if( FD_UNLIKELY( close_args_out != NULL ) ) {
435 0 : close_args_out->shmem = shmem;
436 0 : close_args_out->fd = fd;
437 0 : close_args_out->total_sz = total_sz;
438 0 : }
439 0 : return funk;
440 0 : }
441 :
442 : void
443 0 : fd_funkier_close_file( fd_funkier_close_file_args_t * close_args ) {
444 0 : fd_shmem_leave_anonymous( close_args->shmem, NULL );
445 0 : munmap( close_args->shmem, close_args->total_sz );
446 0 : close( close_args->fd );
447 0 : }
|