Line data Source code
1 : #define _GNU_SOURCE /* Enable GNU and POSIX extensions */
2 :
3 : #include "../tiles.h"
4 :
5 : #include "fd_archiver.h"
6 : #include <errno.h>
7 : #include <fcntl.h>
8 : #include <sys/mman.h>
9 : #include <sys/stat.h>
10 : #include <string.h>
11 : #include <unistd.h>
12 : #include <sys/socket.h>
13 : #include "generated/archiver_writer_seccomp.h"
14 :
15 : /* The archiver writer tile consumes from all input archiver feeder input links,
16 : and writes these to the archive tile. It adds a timestamp to each fragment, so that
17 : there is a total global order across the packets.
18 :
19 : There should only ever be a single archiver writer tile. */
20 :
21 : #define FD_ARCHIVER_WRITER_ALLOC_TAG (3UL)
22 : #define FD_ARCHIVER_WRITER_FRAG_BUF_SZ (4UL*FD_SHRED_STORE_MTU) /* MTU for shred_storei in fd_firedancer.c */
23 : #define FD_ARCHIVER_WRITER_OUT_BUF_SZ (FD_SHMEM_HUGE_PAGE_SZ) /* Flush to the file system every 2MB */
24 :
25 : struct fd_archiver_writer_stats {
26 : ulong net_shred_in_cnt;
27 : ulong net_repair_in_cnt;
28 : };
29 : typedef struct fd_archiver_writer_stats fd_archiver_writer_stats_t;
30 :
31 : typedef struct {
32 : fd_wksp_t * mem;
33 : ulong chunk0;
34 : ulong wmark;
35 : } fd_archiver_writer_in_ctx_t;
36 :
37 : struct fd_archiver_writer_tile_ctx {
38 : void * out_buf;
39 :
40 : fd_archiver_writer_in_ctx_t in[ 32 ];
41 :
42 : fd_archiver_writer_stats_t stats;
43 :
44 : ulong now;
45 : ulong last_packet_ns;
46 : double tick_per_ns;
47 :
48 : fd_io_buffered_ostream_t archive_ostream;
49 :
50 : uchar frag_buf[FD_ARCHIVER_WRITER_FRAG_BUF_SZ];
51 : };
52 : typedef struct fd_archiver_writer_tile_ctx fd_archiver_writer_tile_ctx_t;
53 :
54 : FD_FN_CONST static inline ulong
55 0 : scratch_align( void ) {
56 0 : return 4096UL;
57 0 : }
58 :
59 : FD_FN_PURE static inline ulong
60 0 : loose_footprint( fd_topo_tile_t const * tile FD_PARAM_UNUSED ) {
61 0 : return 1UL * FD_SHMEM_GIGANTIC_PAGE_SZ;
62 0 : }
63 :
64 : static ulong
65 : populate_allowed_seccomp( fd_topo_t const * topo FD_PARAM_UNUSED,
66 : fd_topo_tile_t const * tile,
67 : ulong out_cnt,
68 0 : struct sock_filter * out ) {
69 0 : populate_sock_filter_policy_archiver_writer( out_cnt,
70 0 : out,
71 0 : (uint)fd_log_private_logfile_fd(),
72 0 : (uint)tile->archiver.archive_fd );
73 0 : return sock_filter_policy_archiver_writer_instr_cnt;
74 0 : }
75 :
76 : static ulong
77 : populate_allowed_fds( fd_topo_t const * topo,
78 : fd_topo_tile_t const * tile,
79 : ulong out_fds_cnt,
80 0 : int * out_fds ) {
81 0 : (void)topo;
82 0 : (void)out_fds_cnt;
83 :
84 0 : ulong out_cnt = 0UL;
85 :
86 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
87 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
88 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
89 0 : if( FD_LIKELY( -1!=tile->archiver.archive_fd ) )
90 0 : out_fds[ out_cnt++ ] = tile->archiver.archive_fd; /* archive file */
91 :
92 0 : return out_cnt;
93 0 : }
94 :
95 : FD_FN_PURE static inline ulong
96 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
97 0 : (void)tile;
98 0 : ulong l = FD_LAYOUT_INIT;
99 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_archiver_writer_tile_ctx_t), sizeof(fd_archiver_writer_tile_ctx_t) );
100 0 : l = FD_LAYOUT_APPEND( l, 4096, FD_ARCHIVER_WRITER_OUT_BUF_SZ );
101 0 : return FD_LAYOUT_FINI( l, scratch_align() );
102 0 : }
103 :
104 : static void
105 : privileged_init( fd_topo_t * topo,
106 0 : fd_topo_tile_t * tile ) {
107 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
108 :
109 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
110 0 : fd_archiver_writer_tile_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_archiver_writer_tile_ctx_t), sizeof(fd_archiver_writer_tile_ctx_t) );
111 0 : memset( ctx, 0, sizeof(fd_archiver_writer_tile_ctx_t) );
112 0 : ctx->out_buf = FD_SCRATCH_ALLOC_APPEND( l, 4096, FD_ARCHIVER_WRITER_OUT_BUF_SZ );
113 0 : FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
114 :
115 0 : tile->archiver.archive_fd = open( tile->archiver.rocksdb_path, O_RDWR | O_CREAT | O_DIRECT, 0666 );
116 0 : if ( FD_UNLIKELY( tile->archiver.archive_fd == -1 ) ) {
117 0 : FD_LOG_ERR(( "failed to open or create archive file %s %d %d %s", tile->archiver.rocksdb_path, tile->archiver.archive_fd, errno, strerror(errno) ));
118 0 : }
119 0 : }
120 :
121 : static void
122 : unprivileged_init( fd_topo_t * topo,
123 0 : fd_topo_tile_t * tile ) {
124 :
125 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
126 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
127 0 : fd_archiver_writer_tile_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_archiver_writer_tile_ctx_t), sizeof(fd_archiver_writer_tile_ctx_t) );
128 0 : ctx->out_buf = FD_SCRATCH_ALLOC_APPEND( l, 4096, FD_ARCHIVER_WRITER_OUT_BUF_SZ );
129 0 : FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
130 :
131 : /* Setup the archive tile to be in the expected state */
132 0 : int err = ftruncate( tile->archiver.archive_fd, 0UL );
133 0 : if( FD_UNLIKELY( err==-1 ) ) {
134 0 : FD_LOG_ERR(( "failed to truncate the archive file (%i-%s)", errno, fd_io_strerror( errno ) ));
135 0 : }
136 :
137 0 : long seek = lseek( tile->archiver.archive_fd, 0UL, SEEK_SET );
138 0 : if( FD_UNLIKELY( seek!=0L ) ) {
139 0 : FD_LOG_ERR(( "failed to seek to the beginning of the archive file" ));
140 0 : }
141 :
142 : /* Input links */
143 0 : for( ulong i=0; i<tile->in_cnt; i++ ) {
144 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
145 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
146 :
147 0 : ctx->in[ i ].mem = link_wksp->wksp;
148 0 : ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
149 0 : ctx->in[ i ].wmark = fd_dcache_compact_wmark ( ctx->in[ i ].mem, link->dcache, link->mtu );
150 0 : }
151 :
152 : /* Initialize output stream */
153 0 : if( FD_UNLIKELY( !fd_io_buffered_ostream_init(
154 0 : &ctx->archive_ostream,
155 0 : tile->archiver.archive_fd,
156 0 : ctx->out_buf,
157 0 : FD_ARCHIVER_WRITER_OUT_BUF_SZ ) ) ) {
158 0 : FD_LOG_ERR(( "failed to initialize ostream" ));
159 0 : }
160 :
161 0 : ctx->tick_per_ns = fd_tempo_tick_per_ns( NULL );
162 0 : }
163 :
164 : static void
165 0 : during_housekeeping( fd_archiver_writer_tile_ctx_t * ctx ) {
166 0 : ctx->now =(ulong)((double)(fd_tickcount()) / ctx->tick_per_ns);
167 0 : }
168 :
169 : static inline void
170 : during_frag( fd_archiver_writer_tile_ctx_t * ctx,
171 : ulong in_idx,
172 : ulong seq FD_PARAM_UNUSED,
173 : ulong sig FD_PARAM_UNUSED,
174 : ulong chunk,
175 : ulong sz,
176 0 : ulong ctl FD_PARAM_UNUSED ) {
177 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz<FD_ARCHIVER_FRAG_HEADER_FOOTPRINT ) ) {
178 0 : FD_LOG_ERR(( "chunk %lu %lu corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
179 0 : }
180 :
181 : /* Write the incoming fragment to the ostream */
182 0 : char * src = (char *)fd_chunk_to_laddr( ctx->in[in_idx].mem, chunk );
183 :
184 : /* Update the timestamp of the fragment, so that we have a total ordering */
185 0 : fd_archiver_frag_header_t * header = fd_type_pun( src );
186 0 : FD_TEST(( header->magic == FD_ARCHIVER_HEADER_MAGIC ));
187 :
188 : /* Set the relative delay on the packet */
189 0 : ulong now_ns = ctx->now;
190 0 : if( ctx->last_packet_ns == 0UL ) {
191 0 : header->ns_since_prev_fragment = 0L;
192 0 : } else {
193 0 : header->ns_since_prev_fragment = now_ns - ctx->last_packet_ns;
194 0 : }
195 0 : ctx->last_packet_ns = now_ns;
196 :
197 : /* Copy fragment into buffer */
198 0 : fd_memcpy( ctx->frag_buf, src, sz );
199 :
200 0 : ctx->stats.net_shred_in_cnt += header->tile_id == FD_ARCHIVER_TILE_ID_SHRED;
201 0 : ctx->stats.net_repair_in_cnt += header->tile_id == FD_ARCHIVER_TILE_ID_REPAIR;
202 0 : }
203 :
204 : static inline void
205 : after_frag( fd_archiver_writer_tile_ctx_t * ctx,
206 : ulong in_idx FD_PARAM_UNUSED,
207 : ulong seq FD_PARAM_UNUSED,
208 : ulong sig FD_PARAM_UNUSED,
209 : ulong sz,
210 : ulong tsorig FD_PARAM_UNUSED,
211 : ulong tspub FD_PARAM_UNUSED,
212 0 : fd_stem_context_t * stem FD_PARAM_UNUSED ) {
213 : /* Write frag to file */
214 0 : int err = fd_io_buffered_ostream_write( &ctx->archive_ostream, ctx->frag_buf, sz );
215 0 : if( FD_UNLIKELY( err != 0 ) ) {
216 0 : FD_LOG_WARNING(( "failed to write %lu bytes to output buffer. error: %d", sz, err ));
217 0 : }
218 0 : }
219 :
220 0 : #define STEM_BURST (1UL)
221 0 : #define STEM_LAZY (50UL)
222 :
223 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_archiver_writer_tile_ctx_t
224 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_archiver_writer_tile_ctx_t)
225 :
226 0 : #define STEM_CALLBACK_DURING_FRAG during_frag
227 0 : #define STEM_CALLBACK_AFTER_FRAG after_frag
228 0 : #define STEM_CALLBACK_DURING_HOUSEKEEPING during_housekeeping
229 :
230 : #include "../stem/fd_stem.c"
231 :
232 : fd_topo_run_tile_t fd_tile_archiver_writer = {
233 : .name = "arch_w",
234 : .loose_footprint = loose_footprint,
235 : .populate_allowed_seccomp = populate_allowed_seccomp,
236 : .populate_allowed_fds = populate_allowed_fds,
237 : .scratch_align = scratch_align,
238 : .scratch_footprint = scratch_footprint,
239 : .privileged_init = privileged_init,
240 : .unprivileged_init = unprivileged_init,
241 : .run = stem_run,
242 : };
|