Line data Source code
1 : #define _GNU_SOURCE /* Enable GNU and POSIX extensions */
2 :
3 : #include "../tiles.h"
4 :
5 : #include "fd_archiver.h"
6 : #include <errno.h>
7 : #include <fcntl.h>
8 : #include <sys/mman.h>
9 : #include <sys/stat.h>
10 : #include <string.h>
11 : #include <unistd.h>
12 : #include <linux/unistd.h>
13 : #include <sys/socket.h>
14 : #include <linux/if_xdp.h>
15 : #include "generated/archiver_writer_seccomp.h"
16 :
17 : /* The archiver writer tile consumes from all input archiver feeder input links,
18 : and writes these to the archive tile. It adds a timestamp to each fragment, so that
19 : there is a total global order across the packets.
20 :
21 : There should only ever be a single archiver writer tile. */
22 :
23 0 : #define FD_ARCHIVER_WRITER_ALLOC_TAG (3UL)
24 : #define FD_ARCHIVER_WRITER_FRAG_BUF_SZ (4UL*FD_SHRED_STORE_MTU) /* MTU for shred_storei in fd_firedancer.c */
25 0 : #define FD_ARCHIVER_WRITER_OUT_BUF_SZ (FD_SHMEM_HUGE_PAGE_SZ) /* Flush to the file system every 2MB */
26 :
27 : struct fd_archiver_writer_stats {
28 : ulong net_shred_in_cnt;
29 : ulong net_repair_in_cnt;
30 : };
31 : typedef struct fd_archiver_writer_stats fd_archiver_writer_stats_t;
32 :
33 : typedef struct {
34 : fd_wksp_t * mem;
35 : ulong chunk0;
36 : ulong wmark;
37 : } fd_archiver_writer_in_ctx_t;
38 :
39 : struct fd_archiver_writer_tile_ctx {
40 : void * out_buf;
41 :
42 : fd_archiver_writer_in_ctx_t in[ 32 ];
43 :
44 : fd_archiver_writer_stats_t stats;
45 :
46 : ulong now;
47 : ulong last_packet_ns;
48 : double tick_per_ns;
49 :
50 : fd_io_buffered_ostream_t archive_ostream;
51 :
52 : uchar frag_buf[FD_ARCHIVER_WRITER_FRAG_BUF_SZ];
53 :
54 : fd_alloc_t * alloc;
55 : fd_valloc_t valloc;
56 : };
57 : typedef struct fd_archiver_writer_tile_ctx fd_archiver_writer_tile_ctx_t;
58 :
59 : FD_FN_CONST static inline ulong
60 0 : scratch_align( void ) {
61 0 : return 4096UL;
62 0 : }
63 :
64 : FD_FN_PURE static inline ulong
65 0 : loose_footprint( fd_topo_tile_t const * tile FD_PARAM_UNUSED ) {
66 0 : return 1UL * FD_SHMEM_GIGANTIC_PAGE_SZ;
67 0 : }
68 :
69 : static ulong
70 : populate_allowed_seccomp( fd_topo_t const * topo FD_PARAM_UNUSED,
71 : fd_topo_tile_t const * tile,
72 : ulong out_cnt,
73 0 : struct sock_filter * out ) {
74 0 : populate_sock_filter_policy_archiver_writer( out_cnt,
75 0 : out,
76 0 : (uint)fd_log_private_logfile_fd(),
77 0 : (uint)tile->archiver.archive_fd );
78 0 : return sock_filter_policy_archiver_writer_instr_cnt;
79 0 : }
80 :
81 : static ulong
82 : populate_allowed_fds( fd_topo_t const * topo,
83 : fd_topo_tile_t const * tile,
84 : ulong out_fds_cnt,
85 0 : int * out_fds ) {
86 0 : (void)topo;
87 0 : (void)out_fds_cnt;
88 :
89 0 : ulong out_cnt = 0UL;
90 :
91 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
92 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
93 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
94 0 : if( FD_LIKELY( -1!=tile->archiver.archive_fd ) )
95 0 : out_fds[ out_cnt++ ] = tile->archiver.archive_fd; /* archive file */
96 :
97 0 : return out_cnt;
98 0 : }
99 :
100 : FD_FN_PURE static inline ulong
101 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
102 0 : (void)tile;
103 0 : ulong l = FD_LAYOUT_INIT;
104 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_archiver_writer_tile_ctx_t), sizeof(fd_archiver_writer_tile_ctx_t) );
105 0 : return FD_LAYOUT_FINI( l, scratch_align() );
106 0 : }
107 :
108 : static void
109 : privileged_init( fd_topo_t * topo,
110 0 : fd_topo_tile_t * tile ) {
111 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
112 :
113 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
114 0 : fd_archiver_writer_tile_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_archiver_writer_tile_ctx_t), sizeof(fd_archiver_writer_tile_ctx_t) );
115 0 : memset( ctx, 0, sizeof(fd_archiver_writer_tile_ctx_t) );
116 0 : FD_SCRATCH_ALLOC_APPEND( l, fd_alloc_align(), fd_alloc_footprint() );
117 0 : FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
118 :
119 0 : tile->archiver.archive_fd = open( tile->archiver.archiver_path, O_RDWR | O_CREAT | O_DIRECT, 0666 );
120 0 : if ( FD_UNLIKELY( tile->archiver.archive_fd == -1 ) ) {
121 0 : FD_LOG_ERR(( "failed to open or create archive file %s %d %d %s", tile->archiver.archiver_path, tile->archiver.archive_fd, errno, strerror(errno) ));
122 0 : }
123 0 : }
124 :
125 : static void
126 : unprivileged_init( fd_topo_t * topo,
127 0 : fd_topo_tile_t * tile ) {
128 :
129 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
130 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
131 0 : fd_archiver_writer_tile_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_archiver_writer_tile_ctx_t), sizeof(fd_archiver_writer_tile_ctx_t) );
132 0 : void * alloc_shmem = FD_SCRATCH_ALLOC_APPEND( l, fd_alloc_align(), fd_alloc_footprint() );
133 0 : FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
134 :
135 : /* Setup the archive tile to be in the expected state */
136 0 : int err = ftruncate( tile->archiver.archive_fd, 0UL );
137 0 : if( FD_UNLIKELY( err==-1 ) ) {
138 0 : FD_LOG_ERR(( "failed to truncate the archive file (%i-%s)", errno, fd_io_strerror( errno ) ));
139 0 : }
140 :
141 0 : long seek = lseek( tile->archiver.archive_fd, 0UL, SEEK_SET );
142 0 : if( FD_UNLIKELY( seek!=0L ) ) {
143 0 : FD_LOG_ERR(( "failed to seek to the beginning of the archive file" ));
144 0 : }
145 :
146 : /* Input links */
147 0 : for( ulong i=0; i<tile->in_cnt; i++ ) {
148 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
149 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
150 :
151 0 : ctx->in[ i ].mem = link_wksp->wksp;
152 0 : ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
153 0 : ctx->in[ i ].wmark = fd_dcache_compact_wmark ( ctx->in[ i ].mem, link->dcache, link->mtu );
154 0 : }
155 :
156 : /* Allocator */
157 0 : ctx->alloc = fd_alloc_join( fd_alloc_new( alloc_shmem, FD_ARCHIVER_WRITER_ALLOC_TAG ), fd_tile_idx() );
158 0 : if( FD_UNLIKELY( !ctx->alloc ) ) {
159 0 : FD_LOG_ERR( ( "fd_alloc_join failed" ) );
160 0 : }
161 0 : ctx->valloc = fd_alloc_virtual( ctx->alloc );
162 :
163 : /* Allocate output buffer */
164 0 : ctx->out_buf = fd_valloc_malloc( ctx->valloc, 4096, FD_ARCHIVER_WRITER_OUT_BUF_SZ );
165 0 : if( FD_UNLIKELY( !ctx->out_buf ) ) {
166 0 : FD_LOG_ERR(( "failed to allocate output buffer" ));
167 0 : }
168 :
169 : /* Initialize output stream */
170 0 : if( FD_UNLIKELY( !fd_io_buffered_ostream_init(
171 0 : &ctx->archive_ostream,
172 0 : tile->archiver.archive_fd,
173 0 : ctx->out_buf,
174 0 : FD_ARCHIVER_WRITER_OUT_BUF_SZ ) ) ) {
175 0 : FD_LOG_ERR(( "failed to initialize ostream" ));
176 0 : }
177 :
178 0 : ctx->tick_per_ns = fd_tempo_tick_per_ns( NULL );
179 0 : }
180 :
181 : static void
182 0 : during_housekeeping( fd_archiver_writer_tile_ctx_t * ctx ) {
183 0 : ctx->now =(ulong)((double)(fd_tickcount()) / ctx->tick_per_ns);
184 0 : }
185 :
186 : static inline void
187 : during_frag( fd_archiver_writer_tile_ctx_t * ctx,
188 : ulong in_idx,
189 : ulong seq FD_PARAM_UNUSED,
190 : ulong sig FD_PARAM_UNUSED,
191 : ulong chunk,
192 : ulong sz,
193 0 : ulong ctl FD_PARAM_UNUSED ) {
194 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz<FD_ARCHIVER_FRAG_HEADER_FOOTPRINT ) ) {
195 0 : FD_LOG_ERR(( "chunk %lu %lu corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
196 0 : }
197 :
198 : /* Write the incoming fragment to the ostream */
199 0 : char * src = (char *)fd_chunk_to_laddr( ctx->in[in_idx].mem, chunk );
200 :
201 : /* Update the timestamp of the fragment, so that we have a total ordering */
202 0 : fd_archiver_frag_header_t * header = fd_type_pun( src );
203 0 : FD_TEST(( header->magic == FD_ARCHIVER_HEADER_MAGIC ));
204 :
205 : /* Set the relative delay on the packet */
206 0 : ulong now_ns = ctx->now;
207 0 : if( ctx->last_packet_ns == 0UL ) {
208 0 : header->ns_since_prev_fragment = 0L;
209 0 : } else {
210 0 : header->ns_since_prev_fragment = now_ns - ctx->last_packet_ns;
211 0 : }
212 0 : ctx->last_packet_ns = now_ns;
213 :
214 : /* Copy fragment into buffer */
215 0 : fd_memcpy( ctx->frag_buf, src, sz );
216 :
217 0 : ctx->stats.net_shred_in_cnt += header->tile_id == FD_ARCHIVER_TILE_ID_SHRED;
218 0 : ctx->stats.net_repair_in_cnt += header->tile_id == FD_ARCHIVER_TILE_ID_REPAIR;
219 0 : }
220 :
221 : static inline void
222 : after_frag( fd_archiver_writer_tile_ctx_t * ctx,
223 : ulong in_idx FD_PARAM_UNUSED,
224 : ulong seq FD_PARAM_UNUSED,
225 : ulong sig FD_PARAM_UNUSED,
226 : ulong sz,
227 : ulong tsorig FD_PARAM_UNUSED,
228 : ulong tspub FD_PARAM_UNUSED,
229 0 : fd_stem_context_t * stem FD_PARAM_UNUSED ) {
230 : /* Write frag to file */
231 0 : int err = fd_io_buffered_ostream_write( &ctx->archive_ostream, ctx->frag_buf, sz );
232 0 : if( FD_UNLIKELY( err != 0 ) ) {
233 0 : FD_LOG_WARNING(( "failed to write %lu bytes to output buffer. error: %d", sz, err ));
234 0 : }
235 0 : }
236 :
237 0 : #define STEM_BURST (1UL)
238 0 : #define STEM_LAZY (50UL)
239 :
240 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_archiver_writer_tile_ctx_t
241 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_archiver_writer_tile_ctx_t)
242 :
243 0 : #define STEM_CALLBACK_DURING_FRAG during_frag
244 0 : #define STEM_CALLBACK_AFTER_FRAG after_frag
245 0 : #define STEM_CALLBACK_DURING_HOUSEKEEPING during_housekeeping
246 :
247 : #include "../stem/fd_stem.c"
248 :
249 : fd_topo_run_tile_t fd_tile_archiver_writer = {
250 : .name = "arch_w",
251 : .loose_footprint = loose_footprint,
252 : .populate_allowed_seccomp = populate_allowed_seccomp,
253 : .populate_allowed_fds = populate_allowed_fds,
254 : .scratch_align = scratch_align,
255 : .scratch_footprint = scratch_footprint,
256 : .privileged_init = privileged_init,
257 : .unprivileged_init = unprivileged_init,
258 : .run = stem_run,
259 : };
|