Line data Source code
1 : #define _GNU_SOURCE /* Enable GNU and POSIX extensions */
2 :
3 : #include "../tiles.h"
4 :
5 : #include "fd_archiver.h"
6 : #include <errno.h>
7 : #include <fcntl.h>
8 : #include <sys/mman.h>
9 : #include <sys/stat.h>
10 : #include <string.h>
11 : #include <unistd.h>
12 : #include <linux/unistd.h>
13 : #include <sys/socket.h>
14 : #include <linux/if_xdp.h>
15 : #include "generated/archiver_writer_seccomp.h"
16 :
17 : /* The archiver writer tile consumes from all input archiver feeder input links,
18 : and writes these to the archive tile. It adds a timestamp to each fragment, so that
19 : there is a total global order across the packets.
20 :
21 : There should only ever be a single archiver writer tile. */
22 :
23 : #define FD_ARCHIVER_WRITER_ALLOC_TAG (3UL)
24 : #define FD_ARCHIVER_WRITER_FRAG_BUF_SZ (4UL*FD_SHRED_STORE_MTU) /* MTU for shred_storei in fd_firedancer.c */
25 : #define FD_ARCHIVER_WRITER_OUT_BUF_SZ (FD_SHMEM_HUGE_PAGE_SZ) /* Flush to the file system every 2MB */
26 :
27 : struct fd_archiver_writer_stats {
28 : ulong net_shred_in_cnt;
29 : ulong net_repair_in_cnt;
30 : };
31 : typedef struct fd_archiver_writer_stats fd_archiver_writer_stats_t;
32 :
33 : typedef struct {
34 : fd_wksp_t * mem;
35 : ulong chunk0;
36 : ulong wmark;
37 : } fd_archiver_writer_in_ctx_t;
38 :
39 : struct fd_archiver_writer_tile_ctx {
40 : void * out_buf;
41 :
42 : fd_archiver_writer_in_ctx_t in[ 32 ];
43 :
44 : fd_archiver_writer_stats_t stats;
45 :
46 : ulong now;
47 : ulong last_packet_ns;
48 : double tick_per_ns;
49 :
50 : fd_io_buffered_ostream_t archive_ostream;
51 :
52 : uchar frag_buf[FD_ARCHIVER_WRITER_FRAG_BUF_SZ];
53 : };
54 : typedef struct fd_archiver_writer_tile_ctx fd_archiver_writer_tile_ctx_t;
55 :
56 : FD_FN_CONST static inline ulong
57 0 : scratch_align( void ) {
58 0 : return 4096UL;
59 0 : }
60 :
61 : FD_FN_PURE static inline ulong
62 0 : loose_footprint( fd_topo_tile_t const * tile FD_PARAM_UNUSED ) {
63 0 : return 1UL * FD_SHMEM_GIGANTIC_PAGE_SZ;
64 0 : }
65 :
66 : static ulong
67 : populate_allowed_seccomp( fd_topo_t const * topo FD_PARAM_UNUSED,
68 : fd_topo_tile_t const * tile,
69 : ulong out_cnt,
70 0 : struct sock_filter * out ) {
71 0 : populate_sock_filter_policy_archiver_writer( out_cnt,
72 0 : out,
73 0 : (uint)fd_log_private_logfile_fd(),
74 0 : (uint)tile->archiver.archive_fd );
75 0 : return sock_filter_policy_archiver_writer_instr_cnt;
76 0 : }
77 :
78 : static ulong
79 : populate_allowed_fds( fd_topo_t const * topo,
80 : fd_topo_tile_t const * tile,
81 : ulong out_fds_cnt,
82 0 : int * out_fds ) {
83 0 : (void)topo;
84 0 : (void)out_fds_cnt;
85 :
86 0 : ulong out_cnt = 0UL;
87 :
88 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
89 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
90 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
91 0 : if( FD_LIKELY( -1!=tile->archiver.archive_fd ) )
92 0 : out_fds[ out_cnt++ ] = tile->archiver.archive_fd; /* archive file */
93 :
94 0 : return out_cnt;
95 0 : }
96 :
97 : FD_FN_PURE static inline ulong
98 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
99 0 : (void)tile;
100 0 : ulong l = FD_LAYOUT_INIT;
101 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_archiver_writer_tile_ctx_t), sizeof(fd_archiver_writer_tile_ctx_t) );
102 0 : l = FD_LAYOUT_APPEND( l, 4096, FD_ARCHIVER_WRITER_OUT_BUF_SZ );
103 0 : return FD_LAYOUT_FINI( l, scratch_align() );
104 0 : }
105 :
106 : static void
107 : privileged_init( fd_topo_t * topo,
108 0 : fd_topo_tile_t * tile ) {
109 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
110 :
111 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
112 0 : fd_archiver_writer_tile_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_archiver_writer_tile_ctx_t), sizeof(fd_archiver_writer_tile_ctx_t) );
113 0 : memset( ctx, 0, sizeof(fd_archiver_writer_tile_ctx_t) );
114 0 : ctx->out_buf = FD_SCRATCH_ALLOC_APPEND( l, 4096, FD_ARCHIVER_WRITER_OUT_BUF_SZ );
115 0 : FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
116 :
117 0 : tile->archiver.archive_fd = open( tile->archiver.rocksdb_path, O_RDWR | O_CREAT | O_DIRECT, 0666 );
118 0 : if ( FD_UNLIKELY( tile->archiver.archive_fd == -1 ) ) {
119 0 : FD_LOG_ERR(( "failed to open or create archive file %s %d %d %s", tile->archiver.rocksdb_path, tile->archiver.archive_fd, errno, strerror(errno) ));
120 0 : }
121 0 : }
122 :
123 : static void
124 : unprivileged_init( fd_topo_t * topo,
125 0 : fd_topo_tile_t * tile ) {
126 :
127 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
128 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
129 0 : fd_archiver_writer_tile_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_archiver_writer_tile_ctx_t), sizeof(fd_archiver_writer_tile_ctx_t) );
130 0 : ctx->out_buf = FD_SCRATCH_ALLOC_APPEND( l, 4096, FD_ARCHIVER_WRITER_OUT_BUF_SZ );
131 0 : FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
132 :
133 : /* Setup the archive tile to be in the expected state */
134 0 : int err = ftruncate( tile->archiver.archive_fd, 0UL );
135 0 : if( FD_UNLIKELY( err==-1 ) ) {
136 0 : FD_LOG_ERR(( "failed to truncate the archive file (%i-%s)", errno, fd_io_strerror( errno ) ));
137 0 : }
138 :
139 0 : long seek = lseek( tile->archiver.archive_fd, 0UL, SEEK_SET );
140 0 : if( FD_UNLIKELY( seek!=0L ) ) {
141 0 : FD_LOG_ERR(( "failed to seek to the beginning of the archive file" ));
142 0 : }
143 :
144 : /* Input links */
145 0 : for( ulong i=0; i<tile->in_cnt; i++ ) {
146 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
147 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
148 :
149 0 : ctx->in[ i ].mem = link_wksp->wksp;
150 0 : ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
151 0 : ctx->in[ i ].wmark = fd_dcache_compact_wmark ( ctx->in[ i ].mem, link->dcache, link->mtu );
152 0 : }
153 :
154 : /* Initialize output stream */
155 0 : if( FD_UNLIKELY( !fd_io_buffered_ostream_init(
156 0 : &ctx->archive_ostream,
157 0 : tile->archiver.archive_fd,
158 0 : ctx->out_buf,
159 0 : FD_ARCHIVER_WRITER_OUT_BUF_SZ ) ) ) {
160 0 : FD_LOG_ERR(( "failed to initialize ostream" ));
161 0 : }
162 :
163 0 : ctx->tick_per_ns = fd_tempo_tick_per_ns( NULL );
164 0 : }
165 :
166 : static void
167 0 : during_housekeeping( fd_archiver_writer_tile_ctx_t * ctx ) {
168 0 : ctx->now =(ulong)((double)(fd_tickcount()) / ctx->tick_per_ns);
169 0 : }
170 :
171 : static inline void
172 : during_frag( fd_archiver_writer_tile_ctx_t * ctx,
173 : ulong in_idx,
174 : ulong seq FD_PARAM_UNUSED,
175 : ulong sig FD_PARAM_UNUSED,
176 : ulong chunk,
177 : ulong sz,
178 0 : ulong ctl FD_PARAM_UNUSED ) {
179 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz<FD_ARCHIVER_FRAG_HEADER_FOOTPRINT ) ) {
180 0 : FD_LOG_ERR(( "chunk %lu %lu corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
181 0 : }
182 :
183 : /* Write the incoming fragment to the ostream */
184 0 : char * src = (char *)fd_chunk_to_laddr( ctx->in[in_idx].mem, chunk );
185 :
186 : /* Update the timestamp of the fragment, so that we have a total ordering */
187 0 : fd_archiver_frag_header_t * header = fd_type_pun( src );
188 0 : FD_TEST(( header->magic == FD_ARCHIVER_HEADER_MAGIC ));
189 :
190 : /* Set the relative delay on the packet */
191 0 : ulong now_ns = ctx->now;
192 0 : if( ctx->last_packet_ns == 0UL ) {
193 0 : header->ns_since_prev_fragment = 0L;
194 0 : } else {
195 0 : header->ns_since_prev_fragment = now_ns - ctx->last_packet_ns;
196 0 : }
197 0 : ctx->last_packet_ns = now_ns;
198 :
199 : /* Copy fragment into buffer */
200 0 : fd_memcpy( ctx->frag_buf, src, sz );
201 :
202 0 : ctx->stats.net_shred_in_cnt += header->tile_id == FD_ARCHIVER_TILE_ID_SHRED;
203 0 : ctx->stats.net_repair_in_cnt += header->tile_id == FD_ARCHIVER_TILE_ID_REPAIR;
204 0 : }
205 :
206 : static inline void
207 : after_frag( fd_archiver_writer_tile_ctx_t * ctx,
208 : ulong in_idx FD_PARAM_UNUSED,
209 : ulong seq FD_PARAM_UNUSED,
210 : ulong sig FD_PARAM_UNUSED,
211 : ulong sz,
212 : ulong tsorig FD_PARAM_UNUSED,
213 : ulong tspub FD_PARAM_UNUSED,
214 0 : fd_stem_context_t * stem FD_PARAM_UNUSED ) {
215 : /* Write frag to file */
216 0 : int err = fd_io_buffered_ostream_write( &ctx->archive_ostream, ctx->frag_buf, sz );
217 0 : if( FD_UNLIKELY( err != 0 ) ) {
218 0 : FD_LOG_WARNING(( "failed to write %lu bytes to output buffer. error: %d", sz, err ));
219 0 : }
220 0 : }
221 :
222 0 : #define STEM_BURST (1UL)
223 0 : #define STEM_LAZY (50UL)
224 :
225 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_archiver_writer_tile_ctx_t
226 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_archiver_writer_tile_ctx_t)
227 :
228 0 : #define STEM_CALLBACK_DURING_FRAG during_frag
229 0 : #define STEM_CALLBACK_AFTER_FRAG after_frag
230 0 : #define STEM_CALLBACK_DURING_HOUSEKEEPING during_housekeeping
231 :
232 : #include "../stem/fd_stem.c"
233 :
234 : fd_topo_run_tile_t fd_tile_archiver_writer = {
235 : .name = "arch_w",
236 : .loose_footprint = loose_footprint,
237 : .populate_allowed_seccomp = populate_allowed_seccomp,
238 : .populate_allowed_fds = populate_allowed_fds,
239 : .scratch_align = scratch_align,
240 : .scratch_footprint = scratch_footprint,
241 : .privileged_init = privileged_init,
242 : .unprivileged_init = unprivileged_init,
243 : .run = stem_run,
244 : };
|