Line data Source code
1 : /* For O_DIRECT and O_NOATIME */
2 : #define _GNU_SOURCE
3 :
4 : #include "fd_vinyl.h"
5 : #include "../util/pod/fd_pod.h"
6 :
7 : #include <stdio.h>
8 : #include <errno.h>
9 : #include <unistd.h>
10 : #include <fcntl.h>
11 : #include <sys/stat.h>
12 :
13 : FD_IMPORT_CSTR( fd_vinyl_ctl_help, "src/vinyl/fd_vinyl_ctl_help" );
14 :
15 : static int
16 : fd_vinyl_main( int argc,
17 0 : char ** argv ) {
18 :
19 0 : ulong seed_default = fd_cstr_hash_append( (ulong)fd_log_wallclock(), fd_log_host() );
20 :
21 0 : char const * _pod = fd_env_strip_cmdline_cstr ( &argc, &argv, "--pod", NULL, NULL );
22 0 : char const * _cfg = fd_env_strip_cmdline_cstr ( &argc, &argv, "--cfg", NULL, NULL );
23 0 : ulong seed = fd_env_strip_cmdline_ulong( &argc, &argv, "--seed", NULL, seed_default );
24 0 : char const * type = fd_env_strip_cmdline_cstr ( &argc, &argv, "--type", NULL, "mm" );
25 0 : char const * path = fd_env_strip_cmdline_cstr ( &argc, &argv, "--path", NULL, NULL );
26 0 : int dsync = fd_env_strip_cmdline_int ( &argc, &argv, "--dsync", NULL, 0 );
27 0 : int direct = fd_env_strip_cmdline_int ( &argc, &argv, "--direct", NULL, 0 );
28 0 : int noatime = fd_env_strip_cmdline_int ( &argc, &argv, "--noatime", NULL, 0 );
29 0 : char const * _page_sz = fd_env_strip_cmdline_cstr ( &argc, &argv, "--page-sz", NULL, "gigantic" );
30 0 : ulong page_cnt = fd_env_strip_cmdline_ulong( &argc, &argv, "--page-cnt", NULL, 1UL );
31 0 : ulong near_cpu = fd_env_strip_cmdline_ulong( &argc, &argv, "--near-cpu", NULL, fd_log_cpu_id() );
32 0 : int reset = fd_env_strip_cmdline_int ( &argc, &argv, "--reset", NULL, 0 );
33 0 : char const * info = fd_env_strip_cmdline_cstr ( &argc, &argv, "--info", NULL, NULL );
34 0 : ulong io_seed = fd_env_strip_cmdline_ulong( &argc, &argv, "--io-seed", NULL, 0UL );
35 :
36 0 : int open_flags = O_RDWR | (dsync ? O_DSYNC : 0 ) | (direct ? O_DIRECT : 0) | (noatime ? O_NOATIME : 0);
37 0 : ulong page_sz = fd_cstr_to_shmem_page_sz( _page_sz );
38 0 : ulong info_sz = info ? (strlen( info )+1UL) : 0UL;
39 :
40 0 : if( FD_UNLIKELY( !_pod ) ) FD_LOG_ERR(( "--pod not specified" ));
41 0 : if( FD_UNLIKELY( !page_sz ) ) FD_LOG_ERR(( "bad --page-sz" ));
42 :
43 0 : FD_LOG_NOTICE(( "Attaching to --pod %s", _pod ));
44 :
45 0 : uchar const * pod = fd_wksp_pod_attach( _pod ); /* logs details, guaranteed to succeed */
46 0 : uchar const * cfg;
47 0 : if( FD_UNLIKELY( !_cfg ) ) {
48 0 : FD_LOG_NOTICE(( "--cfg not specified (using pod root for config)" ));
49 0 : cfg = pod;
50 0 : } else {
51 0 : FD_LOG_NOTICE(( "Finding config --cfg %s", _cfg ));
52 0 : cfg = fd_pod_query_subpod( pod, _cfg );
53 0 : if( FD_UNLIKELY( !cfg ) ) FD_LOG_ERR(( "config not found" ));
54 0 : }
55 :
56 0 : FD_LOG_NOTICE(( "Extracting pod configuration" ));
57 :
58 : /* See below for explanation of defaults */
59 0 : ulong spad_max = fd_pod_query_ulong( cfg, "spad_max", fd_vinyl_io_spad_est() );
60 0 : ulong async_min = fd_pod_query_ulong( cfg, "async_min", 2UL );
61 0 : ulong async_max = fd_pod_query_ulong( cfg, "async_max", 2UL*async_min );
62 0 : ulong part_thresh = fd_pod_query_ulong( cfg, "part_thresh", 1UL<<30 );
63 0 : ulong gc_thresh = fd_pod_query_ulong( cfg, "gc_thresh", 8UL<<30 );
64 0 : int gc_eager = fd_pod_query_int ( cfg, "gc_eager", 2 );
65 0 : int style = fd_pod_query_int ( cfg, "style", FD_VINYL_BSTREAM_CTL_STYLE_LZ4 );
66 0 : int level = fd_pod_query_int ( cfg, "level", 1 );
67 :
68 0 : FD_LOG_NOTICE(( "Processing command line configuration overrides" ));
69 :
70 0 : char const * _style = fd_env_strip_cmdline_cstr( &argc, &argv, "--style", NULL, NULL );
71 0 : if( _style ) style = fd_cstr_to_vinyl_bstream_ctl_style( _style );
72 :
73 0 : spad_max = fd_env_strip_cmdline_ulong( &argc, &argv, "--spad-max", NULL, spad_max );
74 0 : async_min = fd_env_strip_cmdline_ulong( &argc, &argv, "--async-min", NULL, async_min );
75 0 : async_max = fd_env_strip_cmdline_ulong( &argc, &argv, "--async-max", NULL, async_max );
76 0 : part_thresh = fd_env_strip_cmdline_ulong( &argc, &argv, "--part-thresh", NULL, part_thresh );
77 0 : gc_thresh = fd_env_strip_cmdline_ulong( &argc, &argv, "--gc-thresh", NULL, gc_thresh );
78 0 : gc_eager = fd_env_strip_cmdline_int ( &argc, &argv, "--gc-eager", NULL, gc_eager );
79 0 : level = fd_env_strip_cmdline_int ( &argc, &argv, "--level", NULL, level );
80 :
81 0 : FD_LOG_NOTICE(( "Mapping vinyl memory regions" ));
82 :
83 0 : void * _vinyl = fd_wksp_pod_map( cfg, "vinyl" ); ulong vinyl_footprint = fd_pod_query_ulong( cfg, "vinyl_footprint", 0UL );
84 0 : void * _cnc = fd_wksp_pod_map( cfg, "cnc" ); ulong cnc_footprint = fd_pod_query_ulong( cfg, "cnc_footprint", 0UL );
85 0 : void * _meta = fd_wksp_pod_map( cfg, "meta" ); ulong meta_footprint = fd_pod_query_ulong( cfg, "meta_footprint", 0UL );
86 0 : void * _line = fd_wksp_pod_map( cfg, "line" ); ulong line_footprint = fd_pod_query_ulong( cfg, "line_footprint", 0UL );
87 0 : void * _io = fd_wksp_pod_map( cfg, "io" ); ulong io_footprint = fd_pod_query_ulong( cfg, "io_footprint", 0UL );
88 0 : void * _ele = fd_wksp_pod_map( cfg, "ele" ); ulong ele_footprint = fd_pod_query_ulong( cfg, "ele_footprint", 0UL );
89 0 : void * _obj = fd_wksp_pod_map( cfg, "obj" ); ulong obj_footprint = fd_pod_query_ulong( cfg, "obj_footprint", 0UL );
90 :
91 0 : # define TEST( c, msg ) do { \
92 0 : if( FD_UNLIKELY( !(c) ) ) FD_LOG_ERR(( "FAIL: %s (%s)", #c, (msg) )); \
93 0 : } while(0)
94 :
95 0 : fd_wksp_t * wksp = fd_wksp_containing( _obj );
96 0 : TEST( wksp, "fd_wksp_containing failed" );
97 :
98 0 : TEST( fd_ulong_is_aligned( (ulong)_vinyl, fd_vinyl_io_mm_align() ), "bad alloc" );
99 0 : TEST( vinyl_footprint >= fd_vinyl_footprint(), "bad alloc" );
100 :
101 0 : int is_mmio = !strcmp( type, "mm" );
102 :
103 0 : FD_LOG_NOTICE(( "io config"
104 0 : "\n\t--type \"%s\""
105 0 : "\n\t--spad-max %lu bytes"
106 0 : "\n\t--path \"%s\""
107 0 : "\n\t--dsync %i"
108 0 : "\n\t--direct %i"
109 0 : "\n\t--noatime %i"
110 0 : "\n\t--page-sz \"%s\"%s"
111 0 : "\n\t--page-cnt %lu pages%s"
112 0 : "\n\t--near-cpu %lu%s"
113 0 : "\n\t--reset %i"
114 0 : "\n\t--info \"%s\" (info_sz %lu bytes%s)"
115 0 : "\n\t--io-seed 0x%016lx%s",
116 0 : type, spad_max, path ? path : "(null)", dsync, direct, noatime,
117 0 : _page_sz, is_mmio && !path ? "" : " (ignored)",
118 0 : page_cnt, is_mmio && !path ? "" : " (ignored)",
119 0 : near_cpu, is_mmio && !path ? "" : " (ignored)",
120 0 : reset, info ? info : "(null)", info_sz, reset ? "" : ", ignored", io_seed, reset ? "" : " (ignored)" ));
121 :
122 0 : FD_LOG_NOTICE(( "Joining bstream" ));
123 :
124 0 : int bstream_type;
125 0 : int fd = -1;
126 0 : void * mmio;
127 0 : ulong mmio_sz;
128 :
129 0 : fd_vinyl_io_t * io;
130 :
131 0 : if( FD_LIKELY( is_mmio ) ) {
132 :
133 0 : if( FD_LIKELY( path ) ) {
134 :
135 0 : fd = open( path, open_flags, (mode_t)0 );
136 :
137 0 : if( FD_LIKELY( fd!=-1 ) ) { /* --path seems to be file (e.g. testing or basic I/O with weak persistence) */
138 :
139 0 : TEST( !direct, "--direct 1 not supported with --type mm and file --path" );
140 : /* FIXME: is dsync valid for mmio? (unclear) noatime? (probably) */
141 :
142 0 : FD_LOG_NOTICE(( "Using file at --path as a memory mapped bstream" ));
143 :
144 0 : bstream_type = 0;
145 :
146 0 : int err = fd_io_mmio_init( fd, FD_IO_MMIO_MODE_READ_WRITE, &mmio, &mmio_sz );
147 0 : if( FD_UNLIKELY( err ) ) FD_LOG_ERR(( "fd_io_mmio_init failed (%i-%s)", err, fd_io_strerror( err ) ));
148 :
149 0 : } else { /* --path doesn't seem to be a file, use shmem (e.g. testing or ultra HPC with weak persistence) */
150 :
151 0 : FD_LOG_NOTICE(( "Using shmem region at --path as a memory mapped bstream (ignoring --dsync, --direct and --noatime)" ));
152 :
153 0 : bstream_type = 1;
154 :
155 0 : fd_shmem_join_info_t info[1];
156 0 : mmio = fd_shmem_join( path, FD_SHMEM_JOIN_MODE_READ_WRITE, NULL, NULL, info );
157 0 : TEST( mmio, "fd_shmem_join failed" );
158 0 : mmio_sz = info->page_sz * info->page_cnt;
159 :
160 0 : }
161 :
162 0 : } else { /* No --path, use an anonymous region (e.g. testing or ultra HPC with no persistence) */
163 :
164 0 : FD_LOG_NOTICE(( "Using an anonymous shmem region as a memory mapped bstream "
165 0 : "(ignoring --dsync, --direct and --noatime, setting --reset to 1)" ));
166 :
167 0 : bstream_type = 2;
168 0 : reset = 1;
169 :
170 0 : mmio = fd_shmem_acquire( page_sz, page_cnt, near_cpu );
171 0 : TEST( mmio, "fd_shmem_acquire failed" );
172 0 : mmio_sz = page_sz*page_cnt;
173 :
174 0 : }
175 :
176 0 : TEST( fd_ulong_is_aligned( (ulong)_io, fd_vinyl_io_mm_align() ), "bad alloc" );
177 0 : TEST( io_footprint >= fd_vinyl_io_mm_footprint( spad_max ), "bad alloc" );
178 :
179 0 : io = fd_vinyl_io_mm_init( _io, spad_max, mmio, mmio_sz, reset, info, info_sz, io_seed );
180 0 : TEST( io, "fd_vinyl_io mm_init failed" );
181 :
182 0 : } else if( !strcmp( type, "bd" ) ) {
183 :
184 0 : TEST( path, "--path not specified for --type bd" );
185 :
186 0 : FD_LOG_NOTICE(( "Using --path as a block device bstream" ));
187 :
188 0 : bstream_type = 3;
189 :
190 0 : fd = open( path, open_flags, 0 );
191 0 : if( FD_UNLIKELY( fd==-1 ) ) FD_LOG_ERR(( "open failed (%i-%s)", errno, fd_io_strerror( errno ) ));
192 :
193 0 : TEST( fd_ulong_is_aligned( (ulong)_io, fd_vinyl_io_bd_align() ), "bad wksp alloc" );
194 0 : TEST( io_footprint >= fd_vinyl_io_bd_footprint( spad_max ), "bad wksp alloc" );
195 :
196 0 : io = fd_vinyl_io_bd_init( _io, spad_max, fd, reset, info, info_sz, io_seed );
197 0 : TEST( io, "fd_vinyl_io bd_init failed" );
198 :
199 0 : } else {
200 :
201 0 : FD_LOG_ERR(( "Unsupported io type" ));
202 :
203 0 : }
204 :
205 0 : FD_LOG_NOTICE(( "Creating vinyl" ));
206 :
207 0 : fd_tpool_t * tpool = NULL;
208 :
209 0 : ulong thread_cnt = fd_tile_cnt();
210 :
211 0 : if( thread_cnt>1UL ) {
212 0 : FD_LOG_NOTICE(( "Creating temporary tpool from all %lu tiles for thread parallel init", thread_cnt ));
213 :
214 0 : static uchar _tpool[ FD_TPOOL_FOOTPRINT( FD_TILE_MAX ) ] __attribute__((aligned(FD_TPOOL_ALIGN)));
215 :
216 0 : tpool = fd_tpool_init( _tpool, thread_cnt, 0UL ); /* logs details */
217 0 : if( FD_UNLIKELY( !tpool ) ) FD_LOG_ERR(( "fd_tpool_init failed" ));
218 :
219 0 : for( ulong thread_idx=1UL; thread_idx<thread_cnt; thread_idx++ )
220 0 : if( FD_UNLIKELY( !fd_tpool_worker_push( tpool, thread_idx ) ) ) FD_LOG_ERR(( "fd_tpool_worker_push failed" ));
221 0 : }
222 :
223 0 : fd_vinyl_t * vinyl = fd_vinyl_init( tpool, 0UL, thread_cnt, level,
224 0 : _vinyl,
225 0 : _cnc, cnc_footprint,
226 0 : _meta, meta_footprint,
227 0 : _line, line_footprint,
228 0 : _ele, ele_footprint,
229 0 : _obj, obj_footprint,
230 0 : io, seed, wksp, async_min, async_max, part_thresh, gc_thresh, gc_eager, style );
231 :
232 0 : TEST( vinyl, "fd_vinyl_init failed" );
233 :
234 0 : if( tpool ) {
235 0 : FD_LOG_NOTICE(( "Destroying temporary tpool" ));
236 0 : fd_tpool_fini( tpool );
237 0 : }
238 :
239 0 : # undef TEST
240 :
241 0 : FD_LOG_NOTICE(( "Running" ));
242 :
243 0 : fd_vinyl_exec( vinyl );
244 :
245 0 : FD_LOG_NOTICE(( "Cleaning up" ));
246 :
247 0 : fd_vinyl_fini( vinyl );
248 0 : fd_vinyl_io_fini( io );
249 :
250 0 : switch( bstream_type ) {
251 0 : case 0: fd_io_mmio_fini ( mmio, mmio_sz ); /* logs details */ break; /* mmio on a file */
252 0 : case 1: fd_shmem_leave ( mmio, NULL, 0UL ); /* logs details */ break; /* mmio on a named shmem region */
253 0 : case 2: fd_shmem_release( mmio, page_sz, page_cnt ); /* logs details */ break; /* mmio on a anon shmem region */
254 0 : default: break; /* block device or other */
255 0 : }
256 :
257 0 : if( FD_LIKELY( fd!=-1 ) && FD_UNLIKELY( close( fd ) ) )
258 0 : FD_LOG_WARNING(( "close failed (%i-%s); attempting to continue", errno, fd_io_strerror( errno ) ));
259 :
260 0 : fd_wksp_pod_unmap( _ele );
261 0 : fd_wksp_pod_unmap( _obj );
262 0 : fd_wksp_pod_unmap( _io );
263 0 : fd_wksp_pod_unmap( _line );
264 0 : fd_wksp_pod_unmap( _meta );
265 0 : fd_wksp_pod_unmap( _cnc );
266 0 : fd_wksp_pod_unmap( _vinyl );
267 :
268 0 : fd_wksp_pod_detach( pod );
269 :
270 0 : return 0;
271 0 : }
272 :
273 : int
274 : main( int argc,
275 : char ** argv ) {
276 : fd_boot( &argc, &argv );
277 :
278 : # define SHIFT(n) argv += (n), argc -= (n)
279 :
280 : if( FD_UNLIKELY( argc<1 ) ) FD_LOG_ERR(( "no arguments" ));
281 :
282 : char const * bin = argv[0];
283 : SHIFT(1);
284 :
285 : umask( (mode_t)0 ); /* So mode setting gets respected */
286 :
287 : /* We let advanced operators configure these. The defaults are
288 : reasonable safe values. E.g. a larger pod might be useful if an
289 : operator wants to stash their own config in the pod created by a
290 : vinyl instance (and then might want the specific vinyl config to be
291 : its own subpod of that pod). Or might want to stash additional
292 : info in the vinyl tile cnc_app region. Or might want a
293 : larger/smaller io append scratch pad to speed up performance/reduce
294 : memory footprint. Or to set the meta cache seed manually. Or use
295 : their own tagging conventions. Or ...
296 :
297 : For obj_footprint_avg, if we assume most objects are minimum sized,
298 : they will take up 2 blocks in the object store (1 for the header
299 : and 1 for the raw encoded pair). Such objects will be stored in a
300 : superblock with 64 other objects. So there is an 8 byte overhead
301 : for the superblock header. And the superblocks are recursively
302 : contained in a larger superblocks with a radix of ~12 which adds a
303 : little more to the overhead (less than 1 byte practically). */
304 :
305 : ulong wksp_tag = 0xfdc12113c597a600UL; /* FD VINYL WKSP TAG 00 */
306 : ulong pod_max = 4096UL;
307 : char const * cfg_path = NULL;
308 : ulong cnc_app_sz = FD_VINYL_CNC_APP_SZ;
309 : ulong spad_max = fd_vinyl_io_spad_est();
310 : ulong async_min = 2UL;
311 : ulong async_max = 4UL;
312 : ulong part_thresh = 1UL << 30; /* insert parallel recovery partitions every ~1 GiB */
313 : ulong gc_thresh = 8UL << 30; /* don't compact unless >~ 8 GiB used */
314 : int gc_eager = 2; /* target <~25% garbage items */
315 : int style = FD_VINYL_BSTREAM_CTL_STYLE_LZ4; /* enable data compression */
316 : int level = 1; /* do a hard reset by default */
317 : ulong obj_footprint_avg = 2UL*FD_VINYL_BSTREAM_BLOCK_SZ + 8UL + 1UL; /* see note above */
318 :
319 : int err = 0;
320 : int cnt = 0;
321 :
322 : while( argc ) {
323 : char const * cmd = argv[0];
324 : SHIFT(1);
325 :
326 : if( !strcmp( cmd, "help" ) ) {
327 :
328 : fflush( stdout ); fflush( stderr );
329 : fputs( fd_vinyl_ctl_help, stdout );
330 : fflush( stdout ); fflush( stderr );
331 :
332 : FD_LOG_NOTICE(( "%i: %s: success", cnt, cmd ));
333 :
334 : } else if( !strcmp( cmd, "set" ) ) {
335 :
336 : if( FD_UNLIKELY( argc<2 ) ) FD_LOG_ERR(( "%i: %s: too few arguments\n\tDo %s help for help", cnt, cmd, bin ));
337 :
338 : char const * key = argv[0];
339 : char const * val = argv[1];
340 :
341 : /**/ if( !strcmp( key, "wksp_tag" ) ) wksp_tag = fd_cstr_to_ulong ( val );
342 : else if( !strcmp( key, "pod_max" ) ) pod_max = fd_cstr_to_ulong ( val );
343 : else if( !strcmp( key, "cfg_path" ) ) cfg_path = val;
344 : else if( !strcmp( key, "cnc_app_sz" ) ) cnc_app_sz = fd_cstr_to_ulong ( val );
345 : else if( !strcmp( key, "spad_max" ) ) spad_max = fd_cstr_to_ulong ( val );
346 : else if( !strcmp( key, "async_min" ) ) async_min = fd_cstr_to_ulong ( val );
347 : else if( !strcmp( key, "async_max" ) ) async_max = fd_cstr_to_ulong ( val );
348 : else if( !strcmp( key, "part_thresh" ) ) part_thresh = fd_cstr_to_ulong ( val );
349 : else if( !strcmp( key, "gc_thresh" ) ) gc_thresh = fd_cstr_to_ulong ( val );
350 : else if( !strcmp( key, "gc_eager" ) ) gc_eager = fd_cstr_to_int ( val );
351 : else if( !strcmp( key, "style" ) ) style = fd_cstr_to_vinyl_bstream_ctl_style( val );
352 : else if( !strcmp( key, "level" ) ) level = fd_cstr_to_int ( val );
353 : else if( !strcmp( key, "obj_footprint_avg" ) ) obj_footprint_avg = fd_cstr_to_ulong ( val );
354 : else FD_LOG_ERR(( "%i: %s %s %s: unknown key", cnt, cmd, key, val));
355 :
356 : FD_LOG_NOTICE(( "%i: %s %s %s: success", cnt, cmd, key, val ));
357 : SHIFT(2);
358 :
359 : } else if( !strcmp( cmd, "alloc-memory" ) ) {
360 :
361 : if( FD_UNLIKELY( argc<5 ) ) FD_LOG_ERR(( "%i: %s: too few arguments\n\tDo %s help for help", cnt, cmd, bin ));
362 :
363 : char const * mem = argv[0];
364 : ulong page_cnt = fd_cstr_to_ulong ( argv[1] );
365 : ulong page_sz = fd_cstr_to_shmem_page_sz( argv[2] );
366 : char const * seq = argv[3];
367 : ulong mode = fd_cstr_to_ulong_octal ( argv[4] );
368 :
369 : if( FD_UNLIKELY( !page_cnt ) )
370 : FD_LOG_ERR(( "%i: %s %s %lu %s %s 0%03lo: bad page count\n\t"
371 : "Do %s help for help", cnt, cmd, mem, page_cnt, argv[2], seq, mode, bin ));
372 :
373 : if( FD_UNLIKELY( !page_sz ) )
374 : FD_LOG_ERR(( "%i: %s %s %lu %s %s 0%03lo: bad page size\n\t"
375 : "Do %s help for help", cnt, cmd, mem, page_cnt, argv[2], seq, mode, bin ));
376 :
377 : /* Partition the pages over the seq */
378 :
379 : ulong sub_page_cnt[ 512UL ];
380 : ulong sub_cpu_idx [ 512UL ];
381 : ulong sub_cnt = fd_cstr_to_ulong_seq( seq, sub_cpu_idx, 512UL );
382 :
383 : if( FD_UNLIKELY( !sub_cnt ) )
384 : FD_LOG_ERR(( "%i: %s %s %lu %s %s 0%03lo: empty or invalid cpu sequence\n\t"
385 : "Do %s help for help", cnt, cmd, mem, page_cnt, argv[2], seq, mode, bin ));
386 :
387 : if( FD_UNLIKELY( sub_cnt>512UL ) )
388 : FD_LOG_ERR(( "%i: %s %s %lu %s %s 0%03lo: sequence too long, increase limit in fd_vinyl_ctl.c\n\t"
389 : "Do %s help for help", cnt, cmd, mem, page_cnt, argv[2], seq, mode, bin ));
390 :
391 : /* TODO: consider striping instead of blocking */
392 :
393 : ulong sub_page_min = page_cnt / sub_cnt;
394 : ulong sub_page_rem = page_cnt % sub_cnt;
395 : for( ulong sub_idx=0UL; sub_idx<sub_cnt; sub_idx++ ) sub_page_cnt[ sub_idx ] = sub_page_min + (ulong)(sub_idx<sub_page_rem);
396 :
397 : /* Create the workspace */
398 :
399 : /* TODO: allow user to specify seed and/or part_max */
400 : int err = fd_wksp_new_named( mem, page_sz, sub_cnt, sub_page_cnt, sub_cpu_idx, mode, 0U, 0UL ); /* logs details */
401 : if( FD_UNLIKELY( err ) )
402 : FD_LOG_ERR(( "%i: %s %s %lu %s %s 0%03lo: fd_wksp_new_named failed (%i-%s)\n\t"
403 : "Do %s help for help", cnt, cmd, mem, page_cnt, argv[2], seq, mode, err, fd_wksp_strerror( err ), bin ));
404 :
405 : FD_LOG_NOTICE(( "%i: %s %s %lu %s %s 0%03lo: success", cnt, cmd, mem, page_cnt, argv[2], seq, mode ));
406 : SHIFT(5);
407 :
408 : } else if( !strcmp( cmd, "free-memory" ) ) {
409 :
410 : if( FD_UNLIKELY( argc<1 ) ) FD_LOG_ERR(( "%i: %s: too few arguments\n\tDo %s help for help", cnt, cmd, bin ));
411 :
412 : char const * mem = argv[0];
413 :
414 : int err = fd_wksp_delete_named( mem );
415 : if( FD_UNLIKELY( err ) )
416 : FD_LOG_ERR(( "%i: %s %s: fd_wksp_delete_named failed (%i-%s)\n\t"
417 : "Do %s help for help", cnt, cmd, mem, err, fd_wksp_strerror( err ), bin ));
418 :
419 : FD_LOG_NOTICE(( "%i: %s %s: success", cnt, cmd, mem ));
420 : SHIFT(1);
421 :
422 : } else if( !strcmp( cmd, "alloc-storage" ) ) {
423 :
424 : if( FD_UNLIKELY( argc<3 ) )
425 : FD_LOG_ERR(( "%i: %s: too few arguments\n\tDo %s help for help", cnt, cmd, bin ));
426 :
427 : char const * path = argv[0];
428 : ulong GiB_cnt = fd_cstr_to_ulong ( argv[1] );
429 : ulong mode = fd_cstr_to_ulong_octal( argv[2] );
430 :
431 : if( FD_UNLIKELY( (!GiB_cnt) | (GiB_cnt>(1UL<<32)) ) )
432 : FD_LOG_ERR(( "%i: %s %s %lu 0%03lo: bad number of gigabytes\n\t"
433 : "Do %s help for help", cnt, cmd, path, GiB_cnt, mode, bin ));
434 :
435 : ulong sz = GiB_cnt << 30;
436 :
437 : int fd = open( path, O_RDWR | O_CREAT | O_EXCL, (mode_t)mode );
438 : if( FD_UNLIKELY( fd==-1 ) )
439 : FD_LOG_ERR(( "%i: %s %s %lu 0%03lo: open failed (%i-%s)\n\tDo %s help for help",
440 : cnt, cmd, path, GiB_cnt, mode, errno, fd_io_strerror( errno ), bin ));
441 :
442 : int err = fd_io_truncate( fd, sz );
443 : if( FD_UNLIKELY( err ) )
444 : FD_LOG_ERR(( "%i: %s %s %lu 0%03lo: fd_io_truncate failed (%i-%s)\n\tDo %s help for help",
445 : cnt, cmd, path, GiB_cnt, mode, err, fd_io_strerror( err ), bin ));
446 :
447 : if( FD_UNLIKELY( close( fd ) ) )
448 : FD_LOG_WARNING(( "%i: %s %s %lu 0%03lo: close failed (%i-%s); attempting to continue",
449 : cnt, cmd, path, GiB_cnt, mode, errno, fd_io_strerror( errno ) ));
450 :
451 : FD_LOG_NOTICE(( "%i: %s %s %lu 0%03lo: success", cnt, cmd, path, GiB_cnt, mode ));
452 : SHIFT(3);
453 :
454 : } else if( !strcmp( cmd, "free-storage" ) ) {
455 :
456 : if( FD_UNLIKELY( argc<1 ) )
457 : FD_LOG_ERR(( "%i: %s: too few arguments\n\tDo %s help for help", cnt, cmd, bin ));
458 :
459 : char const * store = argv[0];
460 :
461 : if( FD_UNLIKELY( unlink( store ) ) )
462 : FD_LOG_ERR(( "%i: %s %s: unlink failed (%i-%s)\n\tDo %s help for help",
463 : cnt, cmd, store, errno, fd_io_strerror( errno ), bin ));
464 :
465 : FD_LOG_NOTICE(( "%i: %s %s: success", cnt, cmd, store ));
466 : SHIFT(1);
467 :
468 : } else if( !strcmp( cmd, "new" ) ) {
469 :
470 : if( FD_UNLIKELY( argc<3 ) )
471 : FD_LOG_ERR(( "%i: %s: too few arguments\n\tDo %s help for help", cnt, cmd, bin ));
472 :
473 : char const * mem = argv[0];
474 : ulong pair_max = fd_cstr_to_ulong( argv[1] );
475 : ulong GiB_max = fd_cstr_to_ulong( argv[2] );
476 :
477 : # define TEST( c, msg ) do { \
478 : if( FD_UNLIKELY( !(c) ) ) \
479 : FD_LOG_ERR(( "%i: %s %s %lu %lu: FAIL %s (%s)\n\tDo %s help for help", \
480 : cnt, cmd, mem, pair_max, GiB_max, #c, (msg), bin )); \
481 : } while(0)
482 :
483 : ulong ele_max = fd_ulong_pow2_up( pair_max + 1UL );
484 : ulong lock_cnt = fd_vinyl_meta_lock_cnt_est( ele_max );
485 : ulong probe_max = ele_max;
486 :
487 : TEST( (0UL<pair_max) & (pair_max<ele_max) & (ele_max<=(ULONG_MAX/sizeof(fd_vinyl_meta_ele_t))), "bad pair_max" );
488 :
489 : ulong mem_max = GiB_max << 30;
490 :
491 : ulong pod_align = fd_pod_align();
492 : ulong pod_footprint = fd_pod_footprint( pod_max );
493 : ulong vinyl_align = fd_vinyl_align();
494 : ulong vinyl_footprint = fd_vinyl_footprint();
495 : ulong cnc_align = fd_cnc_align();
496 : ulong cnc_footprint = fd_cnc_footprint( cnc_app_sz );
497 : ulong meta_align = fd_vinyl_meta_align();
498 : ulong meta_footprint = fd_vinyl_meta_footprint( ele_max, lock_cnt, probe_max );
499 : ulong io_align = fd_ulong_max( fd_vinyl_io_bd_align(), fd_vinyl_io_mm_align() );
500 : ulong io_footprint = fd_ulong_max( fd_vinyl_io_bd_footprint( spad_max ), fd_vinyl_io_mm_footprint( spad_max ) );
501 : ulong line_align = alignof(fd_vinyl_line_t);
502 : /* line footprint computed below */
503 : ulong ele_align = alignof(fd_vinyl_meta_ele_t);
504 : ulong ele_footprint = sizeof(fd_vinyl_meta_ele_t)*ele_max;
505 : ulong obj_align = alignof(fd_vinyl_data_obj_t);
506 : /* obj_footprint compted below */
507 :
508 : /* See note re io_align / io_footprint */
509 :
510 : TEST( pod_footprint, "bad pod_max" );
511 : TEST( cnc_footprint, "bad cnc_app_sz" );
512 : TEST( meta_footprint, "bad pair_max" );
513 : TEST( io_footprint, "bad spad_max" );
514 :
515 : ulong mem_req = pod_footprint + pod_align - 1UL
516 : + vinyl_footprint + vinyl_align - 1UL
517 : + cnc_footprint + cnc_align - 1UL
518 : + meta_footprint + meta_align - 1UL
519 : + io_footprint + io_align - 1UL
520 : + /* below */ line_align - 1UL
521 : + ele_footprint + ele_align - 1UL
522 : + /* below */ obj_align - 1UL; /* FIXME: USE SATURATING ADDS */
523 :
524 : TEST( mem_req<mem_max, "increase maximum GiB allowed and/or decrease pair_max / spad_max / pod_max / cnc_app_sz" );
525 :
526 : ulong line_max = (mem_max - mem_req) / (sizeof(fd_vinyl_line_t) + obj_footprint_avg);
527 :
528 : TEST( line_max>=3UL, "increase maximum GiB allowed and/or decrease pair_max / spad_max / pod_max / cnc_app_sz" );
529 :
530 : ulong line_footprint = sizeof(fd_vinyl_line_t)*line_max;
531 :
532 : mem_req += line_footprint;
533 :
534 : ulong obj_footprint = fd_ulong_align_dn( mem_max - mem_req, alignof(fd_vinyl_data_obj_t) );
535 :
536 : mem_req += obj_footprint;
537 :
538 : TEST( mem_req<=mem_max, "internal error" );
539 :
540 : /* Attach to the memory that will contain this vinyl instance */
541 :
542 : fd_wksp_t * wksp = fd_wksp_attach( mem );
543 : TEST( wksp, "fd_wksp_attach failed" );
544 :
545 : /* Allocate all the needed regions. Note that, even though the
546 : vinyl io tile state is neither shared nor persistent, we
547 : allocate it here so the vinyl tile itself doesn't have to
548 : allocate it (it is dynamically sized and rather large). Since
549 : we want the vinyl tile to be able to pick the type of io
550 : interface and bstream store at startup without creating a new
551 : vinyl instance, we allocated an upper bound for all supported
552 : io types above (they are all roughly the same size anyway).
553 :
554 : Alternatively, we could have the vinyl tile do this allocation
555 : at tile startup. But this would create some additional
556 : complexity: the vinyl tile would need an allocator (and then
557 : one potentially has allocations left over from previous runs
558 : that did not terminate cleanly).
559 :
560 : Similar considerations apply for the data cache state, vinyl
561 : tile state, lines and data objects.
562 :
563 : Note also that, though meta is shared and persistent,
564 : persistence should only be used for post mortem debugging (the
565 : meta cache is recreated from scratch on vinyl tile startup). */
566 :
567 : void * _pod = fd_wksp_alloc_laddr( wksp, pod_align, pod_footprint, wksp_tag );
568 : void * _vinyl = fd_wksp_alloc_laddr( wksp, vinyl_align, vinyl_footprint, wksp_tag );
569 : void * _cnc = fd_wksp_alloc_laddr( wksp, cnc_align, cnc_footprint, wksp_tag );
570 : void * _meta = fd_wksp_alloc_laddr( wksp, meta_align, meta_footprint, wksp_tag );
571 : void * _io = fd_wksp_alloc_laddr( wksp, io_align, io_footprint, wksp_tag );
572 : void * _line = fd_wksp_alloc_laddr( wksp, line_align, line_footprint, wksp_tag ); /* This is kinda big */
573 : void * _ele = fd_wksp_alloc_laddr( wksp, ele_align, ele_footprint, wksp_tag ); /* This is really big */
574 : void * _obj = fd_wksp_alloc_laddr( wksp, obj_align, obj_footprint, wksp_tag );
575 :
576 : /* Note: the bigger obj gets, the better the performance (until it
577 : is large enough pairs always fit in cache but that would dwarf
578 : ele). In typical use cases, this is probably smaller to
579 : comparable to ele (resulting in much cheaper hardware at
580 : comparable speeds for typical usage patterns but less robust
581 : performance for extreme usage patterns). */
582 :
583 : TEST( (!!_pod) & (!!_vinyl) & (!!_cnc) & (!!_io) & (!!_line) & (!!_ele) & (!!_obj),
584 : "fd_wksp_alloc_laddr failed (free unneeded allocs or increase wksp size or partitions)" );
585 :
586 : /* Format and the join the pod and create the cfg subpod as
587 : necessary. */
588 :
589 : uchar * pod = fd_pod_join( fd_pod_new( _pod, pod_max ) );
590 : TEST( pod, "internal error" );
591 :
592 : uchar * cfg;
593 : if( !cfg_path ) cfg = pod;
594 : else {
595 : ulong off = fd_pod_alloc_subpod( pod, cfg_path, 1024UL );
596 : TEST( off, "use shorter cfg_path or increase pod_max?" );
597 : cfg = pod + off;
598 : }
599 :
600 : /* Populate the pod */
601 :
602 : char tmp[ FD_WKSP_CSTR_MAX ];
603 :
604 : TEST( fd_pod_insert_cstr( cfg, "vinyl", fd_wksp_cstr_laddr( _vinyl, tmp ) ), "increase pod_max?" );
605 : TEST( fd_pod_insert_cstr( cfg, "cnc", fd_wksp_cstr_laddr( _cnc, tmp ) ), "increase pod_max?" );
606 : TEST( fd_pod_insert_cstr( cfg, "meta", fd_wksp_cstr_laddr( _meta, tmp ) ), "increase pod_max?" );
607 : TEST( fd_pod_insert_cstr( cfg, "io", fd_wksp_cstr_laddr( _io, tmp ) ), "increase pod_max?" );
608 : TEST( fd_pod_insert_cstr( cfg, "line", fd_wksp_cstr_laddr( _line, tmp ) ), "increase pod_max?" );
609 : TEST( fd_pod_insert_cstr( cfg, "ele", fd_wksp_cstr_laddr( _ele, tmp ) ), "increase pod_max?" );
610 : TEST( fd_pod_insert_cstr( cfg, "obj", fd_wksp_cstr_laddr( _obj, tmp ) ), "increase pod_max?" );
611 :
612 : TEST( fd_pod_insert_ulong( cfg, "vinyl_footprint", vinyl_footprint ), "increase pod_max?" );
613 : TEST( fd_pod_insert_ulong( cfg, "cnc_footprint", cnc_footprint ), "increase pod_max?" );
614 : TEST( fd_pod_insert_ulong( cfg, "meta_footprint", meta_footprint ), "increase pod_max?" );
615 : TEST( fd_pod_insert_ulong( cfg, "io_footprint", io_footprint ), "increase pod_max?" );
616 : TEST( fd_pod_insert_ulong( cfg, "line_footprint", line_footprint ), "increase pod_max?" );
617 : TEST( fd_pod_insert_ulong( cfg, "ele_footprint", ele_footprint ), "increase pod_max?" );
618 : TEST( fd_pod_insert_ulong( cfg, "obj_footprint", obj_footprint ), "increase pod_max?" );
619 :
620 : TEST( fd_pod_insert_ulong( cfg, "spad_max", spad_max ), "increase pod_max?" );
621 : TEST( fd_pod_insert_ulong( cfg, "pair_max", pair_max ), "increase pod_max?" );
622 : TEST( fd_pod_insert_ulong( cfg, "line_max", line_max ), "increase pod_max?" );
623 : TEST( fd_pod_insert_ulong( cfg, "async_min", async_min ), "increase pod_max?" );
624 : TEST( fd_pod_insert_ulong( cfg, "async_max", async_max ), "increase pod_max?" );
625 : TEST( fd_pod_insert_ulong( cfg, "part_thresh", part_thresh ), "increase pod_max?" );
626 : TEST( fd_pod_insert_ulong( cfg, "gc_thresh", gc_thresh ), "increase pod_max?" );
627 : TEST( fd_pod_insert_int ( cfg, "gc_eager", gc_eager ), "increase pod_max?" );
628 : TEST( fd_pod_insert_int ( cfg, "style", style ), "increase pod_max?" );
629 : TEST( fd_pod_insert_int ( cfg, "level", level ), "increase pod_max?" );
630 :
631 : /* Tell the operator where the pod is */
632 : /* FIXME: consider putting the config pod in a normal page named
633 : shmem region or a flat file instead? Probably easier to pass
634 : between applications than a wksp gaddr. */
635 :
636 : printf( "%s\n", fd_wksp_cstr_laddr( _pod, tmp ) );
637 :
638 : /* Clean up */
639 :
640 : if( cfg!=pod ) TEST( fd_pod_compact( cfg, 1 ), "internal error" );
641 :
642 : TEST( fd_pod_leave( pod )==_pod, "internal error" );
643 :
644 : TEST( !fd_wksp_detach( wksp ), "internal error" );
645 :
646 : # undef TEST
647 :
648 : FD_LOG_NOTICE(( "%i: %s %s %lu %lu: success", cnt, cmd, mem, pair_max, GiB_max ));
649 : SHIFT(3);
650 :
651 : } else if( !strcmp( cmd, "delete" ) ) {
652 :
653 : if( FD_UNLIKELY( argc<1 ) )
654 : FD_LOG_ERR(( "%i: %s: too few arguments\n\tDo %s help for help", cnt, cmd, bin ));
655 :
656 : char const * cstr = argv[0];
657 :
658 : # define TEST( c, msg ) do { \
659 : if( FD_UNLIKELY( !(c) ) ) \
660 : FD_LOG_ERR(( "%i: %s %s: FAIL %s (%s)\n\tDo %s help for help", \
661 : cnt, cmd, cstr, #c, (msg), bin )); \
662 : } while(0)
663 :
664 : uchar const * pod = fd_pod_join( fd_wksp_map( cstr ) ); /* logs details */
665 : TEST( pod, "unable to join pod" );
666 :
667 : uchar const * cfg;
668 : if( !cfg_path ) cfg = pod;
669 : else {
670 : cfg = fd_pod_query_subpod( pod, cfg_path );
671 : TEST( cfg, "cfg not found at cfg_path" );
672 : }
673 :
674 : fd_wksp_cstr_free( fd_pod_query_cstr( cfg, "obj", NULL ) );
675 : fd_wksp_cstr_free( fd_pod_query_cstr( cfg, "ele", NULL ) );
676 : fd_wksp_cstr_free( fd_pod_query_cstr( cfg, "line", NULL ) );
677 : fd_wksp_cstr_free( fd_pod_query_cstr( cfg, "io", NULL ) );
678 : fd_wksp_cstr_free( fd_pod_query_cstr( cfg, "meta", NULL ) );
679 : fd_wksp_cstr_free( fd_pod_query_cstr( cfg, "cnc", NULL ) );
680 : fd_wksp_cstr_free( fd_pod_query_cstr( cfg, "vinyl", NULL ) );
681 :
682 : fd_wksp_unmap( fd_pod_leave( pod ) );
683 :
684 : fd_wksp_cstr_free( cstr );
685 :
686 : FD_LOG_NOTICE(( "%i: %s %s: success", cnt, cmd, cstr ));
687 : SHIFT(1);
688 :
689 : } else if( !strcmp( cmd, "exec" ) ) {
690 :
691 : err = fd_vinyl_main( argc, argv );
692 : break;
693 :
694 : } else {
695 :
696 : FD_LOG_ERR(( "%i: %s: unknown command\n\t"
697 : "Do %s help for help", cnt, cmd, bin ));
698 :
699 : }
700 : cnt++;
701 : }
702 :
703 : if( FD_UNLIKELY( cnt<1 ) ) FD_LOG_NOTICE(( "processed %i commands\n\tDo %s help for help", cnt, bin ));
704 : else FD_LOG_NOTICE(( "processed %i commands", cnt ));
705 :
706 : # undef SHIFT
707 :
708 : fd_halt();
709 : return err;
710 : }
|