Line data Source code
1 : #include "fd_snapshot_create.h"
2 : #include "../runtime/sysvar/fd_sysvar_epoch_schedule.h"
3 : #include "../../ballet/zstd/fd_zstd.h"
4 : #include "../runtime/fd_hashes.h"
5 : #include "../runtime/fd_runtime.h"
6 : #include "../runtime/fd_cost_tracker.h"
7 :
8 : #include <errno.h>
9 : #include <stdio.h>
10 : #include <stdlib.h>
11 : #include <sys/stat.h>
12 : #include <sys/types.h>
13 : #include <unistd.h>
14 : #include <zstd.h>
15 :
16 : static uchar padding[ FD_SNAPSHOT_ACC_ALIGN ] = {0};
17 : static fd_account_meta_t default_meta = { .magic = FD_ACCOUNT_META_MAGIC };
18 :
19 : static inline fd_account_meta_t *
20 0 : fd_snapshot_create_get_default_meta( ulong slot ) {
21 0 : default_meta.slot = slot;
22 0 : return &default_meta;
23 0 : }
24 :
25 : static inline void
26 : fd_snapshot_create_populate_acc_vecs( fd_snapshot_ctx_t * snapshot_ctx,
27 : fd_solana_manifest_t * manifest,
28 : fd_tar_writer_t * writer,
29 0 : ulong * out_cap ) {
30 :
31 : /* The append vecs need to be described in an index in the manifest so a
32 : reader knows what account files to look for. These files are technically
33 : slot indexed, but the Firedancer implementation of the Solana snapshot
34 : produces far fewer indices. These storages are for the accounts
35 : that were modified and deleted in the most recent slot because that
36 : information is used by the Agave client to calculate and verify the
37 : bank hash for the given slot. This is done as an optimization to avoid
38 : having to slot index the Firedancer accounts db which would incur a large
39 : performance hit.
40 :
41 : To avoid iterating through the root twice to determine what accounts were
42 : touched in the snapshot slot and what accounts were touched in the
43 : other slots, we will create an array of pubkey pointers for all accounts
44 : that were touched in the snapshot slot. This buffer can be safely sized to
45 : the maximum amount of writable accounts that are possible in a non-epoch
46 : boundary slot. The rationale for this bound is explained in fd_runtime.h.
47 : We will not attempt to create a snapshot on an epoch boundary.
48 :
49 : TODO: We must add compaction here. */
50 :
51 0 : fd_pubkey_t * * snapshot_slot_keys = fd_spad_alloc( snapshot_ctx->spad, alignof(fd_pubkey_t*), sizeof(fd_pubkey_t*) * FD_WRITABLE_ACCS_IN_SLOT );
52 0 : ulong snapshot_slot_key_cnt = 0UL;
53 :
54 : /* We will dynamically resize the number of incremental keys because the upper
55 : bound will be roughly 8 bytes * writable accs in a slot * number of slots
56 : since the last full snapshot which can quickly grow to be severalgigabytes
57 : or more. In the normal case, this won't require dynamic resizing. */
58 0 : #define FD_INCREMENTAL_KEY_INIT_BOUND (100000UL)
59 0 : ulong incremental_key_bound = FD_INCREMENTAL_KEY_INIT_BOUND;
60 0 : ulong incremental_key_cnt = 0UL;
61 0 : fd_funk_rec_key_t const * * incremental_keys = snapshot_ctx->is_incremental ?
62 0 : fd_spad_alloc( snapshot_ctx->spad, alignof(fd_funk_rec_key_t*), sizeof(fd_funk_rec_key_t*) * incremental_key_bound ) :
63 0 : NULL;
64 :
65 0 : #undef FD_INCREMENTAL_KEY_INIT_BOUND
66 :
67 : /* In order to size out the accounts DB index in the manifest, we must
68 : iterate through funk and accumulate the size of all of the records
69 : from all slots before the snapshot_slot. */
70 :
71 0 : fd_funk_t * funk = snapshot_ctx->funk;
72 0 : ulong prev_sz = 0UL;
73 0 : ulong tombstones_cnt = 0UL;
74 0 : for( fd_funk_rec_t const * rec = fd_funk_txn_first_rec( funk, NULL ); NULL != rec; rec = fd_funk_txn_next_rec( funk, rec ) ) {
75 :
76 0 : if( !fd_funk_key_is_acc( rec->pair.key ) ) {
77 0 : continue;
78 0 : }
79 :
80 0 : tombstones_cnt++;
81 :
82 0 : int is_tombstone = rec->flags & FD_FUNK_REC_FLAG_ERASE;
83 0 : uchar const * raw = fd_funk_val( rec, fd_funk_wksp( funk ) );
84 0 : fd_account_meta_t * metadata = is_tombstone ? fd_snapshot_create_get_default_meta( fd_funk_rec_get_erase_data( rec ) ) :
85 0 : (fd_account_meta_t*)raw;
86 :
87 0 : if( !metadata ) {
88 0 : continue;
89 0 : }
90 :
91 0 : if( metadata->magic!=FD_ACCOUNT_META_MAGIC ) {
92 0 : continue;
93 0 : }
94 :
95 0 : if( snapshot_ctx->is_incremental ) {
96 : /* We only care about accounts that were modified since the last
97 : snapshot slot for incremental snapshots.
98 :
99 : We also need to keep track of the capitalization for all of the
100 : accounts that are in the incremental as this is verified. */
101 0 : if( metadata->slot<=snapshot_ctx->last_snap_slot ) {
102 0 : continue;
103 0 : }
104 0 : incremental_keys[ incremental_key_cnt++ ] = rec->pair.key;
105 0 : *out_cap += metadata->info.lamports;
106 :
107 0 : if( FD_UNLIKELY( incremental_key_cnt==incremental_key_bound ) ) {
108 : /* Dynamically resize if needed. */
109 0 : incremental_key_bound *= 2UL;
110 0 : fd_funk_rec_key_t const * * new_incremental_keys = fd_spad_alloc( snapshot_ctx->spad,
111 0 : alignof(fd_funk_rec_key_t*),
112 0 : sizeof(fd_funk_rec_key_t*) * incremental_key_bound );
113 0 : fd_memcpy( new_incremental_keys, incremental_keys, sizeof(fd_funk_rec_key_t*) * incremental_key_cnt );
114 0 : fd_valloc_free( fd_spad_virtual( snapshot_ctx->spad ), incremental_keys );
115 0 : incremental_keys = new_incremental_keys;
116 0 : }
117 0 : }
118 :
119 : /* We know that all of the accounts from the snapshot slot can fit into
120 : one append vec, so we ignore all accounts from the snapshot slot. */
121 :
122 0 : if( metadata->slot==snapshot_ctx->slot ) {
123 0 : continue;
124 0 : }
125 :
126 0 : prev_sz += metadata->dlen + sizeof(fd_solana_account_hdr_t);
127 :
128 0 : }
129 :
130 : /* At this point we have sized out all of the relevant accounts that will
131 : be included in the snapshot. Now we must populate each of the append vecs
132 : and update the index as we go.
133 :
134 : When we account for the number of slots we need to consider one append vec
135 : for the snapshot slot and try to maximally fill up the others: an append
136 : vec has a protocol-defined maximum size in Agave. */
137 :
138 0 : ulong num_slots = 1UL + prev_sz / FD_SNAPSHOT_APPEND_VEC_SZ_MAX +
139 0 : (prev_sz % FD_SNAPSHOT_APPEND_VEC_SZ_MAX ? 1UL : 0UL);
140 :
141 0 : fd_solana_accounts_db_fields_t * accounts_db = &manifest->accounts_db;
142 :
143 0 : accounts_db->storages_len = num_slots;
144 0 : accounts_db->storages = fd_spad_alloc( snapshot_ctx->spad,
145 0 : FD_SNAPSHOT_SLOT_ACC_VECS_ALIGN,
146 0 : sizeof(fd_snapshot_slot_acc_vecs_t) * accounts_db->storages_len );
147 0 : accounts_db->version = 1UL;
148 0 : accounts_db->slot = snapshot_ctx->slot;
149 0 : accounts_db->historical_roots_len = 0UL;
150 0 : accounts_db->historical_roots = NULL;
151 0 : accounts_db->historical_roots_with_hash_len = 0UL;
152 0 : accounts_db->historical_roots_with_hash = NULL;
153 :
154 0 : for( ulong i=0UL; i<num_slots; i++ ) {
155 : /* Populate the storages for each slot. As a note, the slot number only
156 : matters for the snapshot slot. The other slot numbers don't affect
157 : consensus at all. Agave also maintains an invariant that there can
158 : only be one account vec per storage. */
159 :
160 0 : accounts_db->storages[ i ].account_vecs_len = 1UL;
161 0 : accounts_db->storages[ i ].account_vecs = fd_spad_alloc( snapshot_ctx->spad,
162 0 : FD_SNAPSHOT_ACC_VEC_ALIGN,
163 0 : sizeof(fd_snapshot_acc_vec_t) * accounts_db->storages[ i ].account_vecs_len );
164 0 : accounts_db->storages[ i ].account_vecs[ 0 ].file_sz = 0UL;
165 0 : accounts_db->storages[ i ].account_vecs[ 0 ].id = i + 1UL;
166 0 : accounts_db->storages[ i ].slot = snapshot_ctx->slot - i;
167 0 : }
168 :
169 : /* At this point we have iterated through all of the accounts and created
170 : the index. We are now ready to generate a snapshot hash. For both
171 : snapshots we need to generate two hashes:
172 : 1. The accounts hash. This is a simple hash of all of the accounts
173 : included in the snapshot.
174 : 2. The snapshot hash. This is a hash of the accounts hash and the epoch
175 : account hash. If the EAH is not included, then the accounts hash ==
176 : snapshot hash.
177 :
178 : There is some nuance as to which hash goes where. For full snapshots,
179 : the accounts hash in the bank hash info is the accounts hash. The hash in
180 : the filename is the snapshot hash.
181 :
182 : For incremental snapshots, the account hash in the bank hash info field is
183 : left zeroed out. The full snapshot's hash is in the incremental persistence
184 : field. The incremental snapshot's accounts hash is included in the
185 : incremental persistence field. The hash in the filename is the snapshot
186 : hash. */
187 :
188 0 : int err;
189 0 : if( !snapshot_ctx->is_incremental ) {
190 :
191 0 : err = fd_snapshot_service_hash( &snapshot_ctx->acc_hash,
192 0 : &snapshot_ctx->snap_hash,
193 0 : &snapshot_ctx->slot_bank,
194 0 : &snapshot_ctx->epoch_bank,
195 0 : snapshot_ctx->funk,
196 0 : snapshot_ctx->tpool,
197 0 : snapshot_ctx->spad,
198 0 : snapshot_ctx->features );
199 0 : accounts_db->bank_hash_info.accounts_hash = snapshot_ctx->acc_hash;
200 0 : } else {
201 0 : err = fd_snapshot_service_inc_hash( &snapshot_ctx->acc_hash,
202 0 : &snapshot_ctx->snap_hash,
203 0 : &snapshot_ctx->slot_bank,
204 0 : &snapshot_ctx->epoch_bank,
205 0 : snapshot_ctx->funk,
206 0 : incremental_keys,
207 0 : incremental_key_cnt,
208 0 : snapshot_ctx->spad,
209 0 : snapshot_ctx->features );
210 0 : fd_valloc_free( fd_spad_virtual( snapshot_ctx->spad ), incremental_keys );
211 :
212 0 : fd_memset( &accounts_db->bank_hash_info.accounts_hash, 0, sizeof(fd_hash_t) );
213 0 : }
214 :
215 0 : FD_LOG_NOTICE(( "Hashes calculated acc_hash=%s snapshot_hash=%s",
216 0 : FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->acc_hash),
217 0 : FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->snap_hash) ));
218 :
219 0 : if( FD_UNLIKELY( err ) ) {
220 0 : FD_LOG_ERR(( "Unable to calculate snapshot hash" ));
221 0 : }
222 :
223 0 : fd_memset( &accounts_db->bank_hash_info.stats, 0, sizeof(fd_bank_hash_stats_t) );
224 :
225 : /* Now, we have calculated the relevant hashes for the accounts.
226 : Because the files are serially written out for tar and we need to prepend
227 : the manifest, we must reserve space in the archive for the solana manifest. */
228 :
229 0 : if( snapshot_ctx->is_incremental ) {
230 0 : manifest->bank_incremental_snapshot_persistence = fd_spad_alloc( snapshot_ctx->spad,
231 0 : FD_BANK_INCREMENTAL_SNAPSHOT_PERSISTENCE_ALIGN,
232 0 : sizeof(fd_bank_incremental_snapshot_persistence_t) );
233 0 : }
234 :
235 0 : ulong manifest_sz = fd_solana_manifest_size( manifest );
236 :
237 0 : char buffer[ FD_SNAPSHOT_DIR_MAX ];
238 0 : err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "snapshots/%lu/%lu", snapshot_ctx->slot, snapshot_ctx->slot );
239 0 : if( FD_UNLIKELY( err<0 ) ) {
240 0 : FD_LOG_ERR(( "Unable to format manifest name string" ));
241 0 : }
242 :
243 0 : err = fd_tar_writer_new_file( writer, buffer );
244 0 : if( FD_UNLIKELY( err ) ) {
245 0 : FD_LOG_ERR(( "Unable to create snapshot manifest file" ));
246 0 : }
247 :
248 : /* TODO: We want to eliminate having to write back into the tar file. This
249 : will enable the snapshot service to only use one file per snapshot.
250 : In order to do this, we must precompute the index in the manifest
251 : completely. This will allow us to stream out a compressed snapshot. */
252 :
253 0 : err = fd_tar_writer_make_space( writer, manifest_sz );
254 0 : if( FD_UNLIKELY( err ) ) {
255 0 : FD_LOG_ERR(( "Unable to make space for snapshot manifest file" ));
256 0 : }
257 :
258 0 : err = fd_tar_writer_fini_file( writer );
259 0 : if( FD_UNLIKELY( err ) ) {
260 0 : FD_LOG_ERR(( "Unable to finalize snapshot manifest file" ));
261 0 : }
262 :
263 : /* We have made space for the manifest and are ready to append the append
264 : vec files directly into the tar archive. We will iterate through all of
265 : the records in the funk root and create/populate an append vec for
266 : previous slots. Just record the pubkeys for the latest slot to populate
267 : the append vec after. If the append vec is full, write into the next one. */
268 :
269 0 : ulong curr_slot = 1UL;
270 0 : fd_snapshot_acc_vec_t * prev_accs = &accounts_db->storages[ curr_slot ].account_vecs[ 0UL ];
271 :
272 0 : err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "accounts/%lu.%lu", snapshot_ctx->slot - curr_slot, prev_accs->id );
273 0 : if( FD_UNLIKELY( err<0 ) ) {
274 0 : FD_LOG_ERR(( "Unable to format previous accounts name string" ));
275 0 : }
276 :
277 0 : err = fd_tar_writer_new_file( writer, buffer );
278 0 : if( FD_UNLIKELY( err ) ) {
279 0 : FD_LOG_ERR(( "Unable to create previous accounts file" ));
280 0 : }
281 :
282 0 : fd_funk_rec_t * * tombstones = snapshot_ctx->is_incremental ? NULL :
283 0 : fd_spad_alloc( snapshot_ctx->spad, alignof(fd_funk_rec_t*), sizeof(fd_funk_rec_t*) * tombstones_cnt );
284 0 : tombstones_cnt = 0UL;
285 :
286 0 : for( fd_funk_rec_t const * rec = fd_funk_txn_first_rec( funk, NULL ); NULL != rec; rec = fd_funk_txn_next_rec( funk, rec ) ) {
287 :
288 : /* Get the account data. */
289 :
290 0 : if( !fd_funk_key_is_acc( rec->pair.key ) ) {
291 0 : continue;
292 0 : }
293 :
294 0 : fd_pubkey_t const * pubkey = fd_type_pun_const( rec->pair.key[0].uc );
295 0 : int is_tombstone = rec->flags & FD_FUNK_REC_FLAG_ERASE;
296 0 : uchar const * raw = fd_funk_val( rec, fd_funk_wksp( funk ) );
297 0 : fd_account_meta_t * metadata = is_tombstone ? fd_snapshot_create_get_default_meta( fd_funk_rec_get_erase_data( rec ) ) :
298 0 : (fd_account_meta_t*)raw;
299 :
300 0 : if( !snapshot_ctx->is_incremental && is_tombstone ) {
301 : /* If we are in a full snapshot, we need to gather all of the accounts
302 : that we plan on deleting. */
303 0 : tombstones[ tombstones_cnt++ ] = (fd_funk_rec_t*)rec;
304 0 : }
305 :
306 0 : if( !metadata ) {
307 0 : continue;
308 0 : }
309 :
310 0 : if( metadata->magic!=FD_ACCOUNT_META_MAGIC ) {
311 0 : continue;
312 0 : }
313 :
314 : /* Don't iterate through accounts that were touched before the last full
315 : snapshot. */
316 0 : if( snapshot_ctx->is_incremental && metadata->slot<=snapshot_ctx->last_snap_slot ) {
317 0 : continue;
318 0 : }
319 :
320 0 : uchar const * acc_data = raw + metadata->hlen;
321 :
322 : /* All accounts that were touched in the snapshot slot should be in
323 : a different append vec so that Agave can calculate the snapshot slot's
324 : bank hash. We don't want to include them in an arbitrary append vec. */
325 :
326 0 : if( metadata->slot==snapshot_ctx->slot ) {
327 0 : snapshot_slot_keys[ snapshot_slot_key_cnt++ ] = (fd_pubkey_t*)pubkey;
328 0 : continue;
329 0 : }
330 :
331 : /* We don't want to iterate over tombstones if the snapshot is not
332 : incremental */
333 0 : if( !snapshot_ctx->is_incremental && is_tombstone ) {
334 0 : continue;
335 0 : }
336 :
337 0 : ulong new_sz = prev_accs->file_sz + sizeof(fd_solana_account_hdr_t) + fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN );
338 :
339 0 : if( new_sz>FD_SNAPSHOT_APPEND_VEC_SZ_MAX ) {
340 :
341 : /* When the current append vec is full, finish writing it, start writing
342 : into the next append vec. */
343 :
344 0 : err = fd_tar_writer_fini_file( writer );
345 0 : if( FD_UNLIKELY( err ) ) {
346 0 : FD_LOG_ERR(( "Unable to finalize previous accounts file" ));
347 0 : }
348 :
349 0 : prev_accs = &accounts_db->storages[ ++curr_slot ].account_vecs[ 0UL ];
350 :
351 0 : err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "accounts/%lu.%lu", snapshot_ctx->slot - curr_slot, prev_accs->id );
352 0 : if( FD_UNLIKELY( err<0 ) ) {
353 0 : FD_LOG_ERR(( "Unable to format previous accounts name string" ));
354 0 : }
355 :
356 0 : err = fd_tar_writer_new_file( writer, buffer );
357 0 : if( FD_UNLIKELY( err ) ) {
358 0 : FD_LOG_ERR(( "Unable to create previous accounts file" ));
359 0 : }
360 0 : }
361 :
362 0 : prev_accs->file_sz += sizeof(fd_solana_account_hdr_t) + fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN );
363 :
364 :
365 : /* Write out the header. */
366 :
367 0 : fd_solana_account_hdr_t header = {0};
368 : /* Stored meta */
369 0 : header.meta.write_version_obsolete = 0UL;
370 0 : header.meta.data_len = metadata->dlen;
371 0 : fd_memcpy( header.meta.pubkey, pubkey, sizeof(fd_pubkey_t) );
372 : /* Account Meta */
373 0 : header.info.lamports = metadata->info.lamports;
374 0 : header.info.rent_epoch = header.info.lamports ? metadata->info.rent_epoch : 0UL;
375 0 : fd_memcpy( header.info.owner, metadata->info.owner, sizeof(fd_pubkey_t) );
376 0 : header.info.executable = metadata->info.executable;
377 : /* Hash */
378 0 : fd_memcpy( &header.hash, metadata->hash, sizeof(fd_hash_t) );
379 :
380 0 : err = fd_tar_writer_write_file_data( writer, &header, sizeof(fd_solana_account_hdr_t) );
381 0 : if( FD_UNLIKELY( err ) ) {
382 0 : FD_LOG_ERR(( "Unable to stream out account header to tar archive" ));
383 0 : }
384 :
385 : /* Write out the file data. */
386 :
387 0 : err = fd_tar_writer_write_file_data( writer, acc_data, metadata->dlen );
388 0 : if( FD_UNLIKELY( err ) ) {
389 0 : FD_LOG_ERR(( "Unable to stream out account data to tar archive" ));
390 0 : }
391 :
392 0 : ulong align_sz = fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN ) - metadata->dlen;
393 0 : err = fd_tar_writer_write_file_data( writer, padding, align_sz );
394 0 : if( FD_UNLIKELY( err ) ) {
395 0 : FD_LOG_ERR( ("Unable to stream out account padding to tar archive" ));
396 0 : }
397 0 : }
398 :
399 0 : err = fd_tar_writer_fini_file( writer );
400 0 : if( FD_UNLIKELY( err ) ) {
401 0 : FD_LOG_ERR(( "Unable to finalize previous accounts file" ));
402 0 : }
403 :
404 : /* Now write out the append vec for the snapshot slot. Again, this is needed
405 : because the snapshot slot's accounts must be in their append vec in order
406 : to verify the bank hash for the snapshot slot in the Agave client. */
407 :
408 0 : fd_snapshot_acc_vec_t * curr_accs = &accounts_db->storages[ 0UL ].account_vecs[ 0UL ];
409 0 : err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "accounts/%lu.%lu", snapshot_ctx->slot, curr_accs->id );
410 0 : if( FD_UNLIKELY( err<0 ) ) {
411 0 : FD_LOG_ERR(( "Unable to format current accounts name string" ));
412 0 : }
413 :
414 0 : err = fd_tar_writer_new_file( writer, buffer );
415 0 : if( FD_UNLIKELY( err ) ) {
416 0 : FD_LOG_ERR(( "Unable to create current accounts file" ));
417 0 : }
418 :
419 0 : for( ulong i=0UL; i<snapshot_slot_key_cnt; i++ ) {
420 :
421 0 : fd_pubkey_t const * pubkey = snapshot_slot_keys[i];
422 0 : fd_funk_rec_key_t key = fd_funk_acc_key( pubkey );
423 :
424 0 : fd_funk_rec_query_t query[1];
425 0 : fd_funk_rec_t const * rec = fd_funk_rec_query_try( funk, NULL, &key, query );
426 0 : if( FD_UNLIKELY( !rec ) ) {
427 0 : FD_LOG_ERR(( "Previously found record can no longer be found" ));
428 0 : }
429 :
430 0 : int is_tombstone = rec->flags & FD_FUNK_REC_FLAG_ERASE;
431 0 : uchar const * raw = fd_funk_val( rec, fd_funk_wksp( funk ) );
432 0 : fd_account_meta_t * metadata = is_tombstone ? fd_snapshot_create_get_default_meta( fd_funk_rec_get_erase_data( rec ) ) :
433 0 : (fd_account_meta_t*)raw;
434 :
435 0 : if( FD_UNLIKELY( !metadata ) ) {
436 0 : FD_LOG_ERR(( "Record should have non-NULL metadata" ));
437 0 : }
438 :
439 0 : if( FD_UNLIKELY( metadata->magic!=FD_ACCOUNT_META_MAGIC ) ) {
440 0 : FD_LOG_ERR(( "Record should have valid magic" ));
441 0 : }
442 :
443 0 : uchar const * acc_data = raw + metadata->hlen;
444 :
445 0 : curr_accs->file_sz += sizeof(fd_solana_account_hdr_t) + fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN );
446 :
447 : /* Write out the header. */
448 0 : fd_solana_account_hdr_t header = {0};
449 : /* Stored meta */
450 0 : header.meta.write_version_obsolete = 0UL;
451 0 : header.meta.data_len = metadata->dlen;
452 0 : fd_memcpy( header.meta.pubkey, pubkey, sizeof(fd_pubkey_t) );
453 : /* Account Meta */
454 0 : header.info.lamports = metadata->info.lamports;
455 0 : header.info.rent_epoch = header.info.lamports ? metadata->info.rent_epoch : 0UL;
456 0 : fd_memcpy( header.info.owner, metadata->info.owner, sizeof(fd_pubkey_t) );
457 0 : header.info.executable = metadata->info.executable;
458 : /* Hash */
459 0 : fd_memcpy( &header.hash, metadata->hash, sizeof(fd_hash_t) );
460 :
461 :
462 0 : err = fd_tar_writer_write_file_data( writer, &header, sizeof(fd_solana_account_hdr_t) );
463 0 : if( FD_UNLIKELY( err ) ) {
464 0 : FD_LOG_ERR(( "Unable to stream out account header to tar archive" ));
465 0 : }
466 0 : err = fd_tar_writer_write_file_data( writer, acc_data, metadata->dlen );
467 0 : if( FD_UNLIKELY( err ) ) {
468 0 : FD_LOG_ERR(( "Unable to stream out account data to tar archive" ));
469 0 : }
470 0 : ulong align_sz = fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN ) - metadata->dlen;
471 0 : err = fd_tar_writer_write_file_data( writer, padding, align_sz );
472 0 : if( FD_UNLIKELY( err ) ) {
473 0 : FD_LOG_ERR(( "Unable to stream out account padding to tar archive" ));
474 0 : }
475 :
476 0 : FD_TEST( !fd_funk_rec_query_test( query ) );
477 0 : }
478 :
479 0 : err = fd_tar_writer_fini_file( writer );
480 0 : if( FD_UNLIKELY( err ) ) {
481 0 : FD_LOG_ERR(( "Unable to finish writing out file" ));
482 0 : }
483 :
484 : /* TODO: At this point we must implement compaction to the snapshot service.
485 : Without this, we are actually not cleaning up any tombstones from funk. */
486 :
487 0 : if( snapshot_ctx->is_incremental ) {
488 0 : err = fd_funk_rec_forget( funk, tombstones, tombstones_cnt );
489 0 : if( FD_UNLIKELY( err!=FD_FUNK_SUCCESS ) ) {
490 0 : FD_LOG_ERR(( "Unable to forget tombstones" ));
491 0 : }
492 0 : FD_LOG_NOTICE(( "Compacted %lu tombstone records", tombstones_cnt ));
493 0 : }
494 :
495 0 : fd_valloc_free( fd_spad_virtual( snapshot_ctx->spad ), snapshot_slot_keys );
496 0 : fd_valloc_free( fd_spad_virtual( snapshot_ctx->spad ), tombstones );
497 :
498 0 : }
499 :
500 : static void
501 : fd_snapshot_create_serialiable_stakes( fd_snapshot_ctx_t * snapshot_ctx,
502 : fd_stakes_delegation_t * old_stakes,
503 0 : fd_stakes_delegation_t * new_stakes ) {
504 :
505 : /* The deserialized stakes cache that is used by the runtime can't be
506 : reserialized into the format that Agave uses. For every vote account
507 : in the stakes struct, the Firedancer client holds a decoded copy of the
508 : vote state. However, this vote state can't be reserialized back into the
509 : full vote account data.
510 :
511 : This poses a problem in the Agave client client because upon boot, Agave
512 : verifies that for all of the vote accounts in the stakes struct, the data
513 : in the cache is the same as the data in the accounts db.
514 :
515 : The other problem is that the Firedancer stakes cache does not evict old
516 : entries and doesn't update delegations within the cache. The cache will
517 : just insert new pubkeys as stake accounts are created/delegated to. To
518 : make the cache conformant for the snapshot, old accounts should be removed
519 : from the snapshot and all of the delegations should be updated. */
520 :
521 : /* First populate the vote accounts using the vote accounts/stakes cache.
522 : We can populate over all of the fields except we can't reserialize the
523 : vote account data. Instead we will copy over the raw contents of all of
524 : the vote accounts. */
525 :
526 0 : ulong vote_accounts_len = fd_vote_accounts_pair_t_map_size( old_stakes->vote_accounts.vote_accounts_pool, old_stakes->vote_accounts.vote_accounts_root );
527 0 : uchar * pool_mem = fd_spad_alloc( snapshot_ctx->spad, fd_vote_accounts_pair_t_map_align(), fd_vote_accounts_pair_t_map_footprint( vote_accounts_len ) );
528 0 : new_stakes->vote_accounts.vote_accounts_pool = fd_vote_accounts_pair_t_map_join( fd_vote_accounts_pair_t_map_new( pool_mem, vote_accounts_len ) );
529 0 : new_stakes->vote_accounts.vote_accounts_root = NULL;
530 :
531 0 : for( fd_vote_accounts_pair_t_mapnode_t * n = fd_vote_accounts_pair_t_map_minimum(
532 0 : old_stakes->vote_accounts.vote_accounts_pool,
533 0 : old_stakes->vote_accounts.vote_accounts_root );
534 0 : n;
535 0 : n = fd_vote_accounts_pair_t_map_successor( old_stakes->vote_accounts.vote_accounts_pool, n ) ) {
536 :
537 0 : fd_vote_accounts_pair_t_mapnode_t * new_node = fd_vote_accounts_pair_t_map_acquire( new_stakes->vote_accounts.vote_accounts_pool );
538 0 : new_node->elem.key = n->elem.key;
539 0 : new_node->elem.stake = n->elem.stake;
540 : /* Now to populate the value, lookup the account using the acc mgr */
541 0 : FD_TXN_ACCOUNT_DECL( vote_acc );
542 0 : int err = fd_txn_account_init_from_funk_readonly( vote_acc, &n->elem.key, snapshot_ctx->funk, NULL );
543 0 : if( FD_UNLIKELY( err ) ) {
544 0 : FD_LOG_ERR(( "Failed to view vote account from stakes cache %s", FD_BASE58_ENC_32_ALLOCA(&n->elem.key) ));
545 0 : }
546 :
547 0 : new_node->elem.value.lamports = vote_acc->vt->get_lamports( vote_acc );
548 0 : new_node->elem.value.data_len = vote_acc->vt->get_data_len( vote_acc );
549 0 : new_node->elem.value.data = fd_spad_alloc( snapshot_ctx->spad, 8UL, vote_acc->vt->get_data_len( vote_acc ) );
550 0 : fd_memcpy( new_node->elem.value.data, vote_acc->vt->get_data( vote_acc ), vote_acc->vt->get_data_len( vote_acc ) );
551 0 : new_node->elem.value.owner = *vote_acc->vt->get_owner( vote_acc );
552 0 : new_node->elem.value.executable = (uchar)vote_acc->vt->is_executable( vote_acc );
553 0 : new_node->elem.value.rent_epoch = vote_acc->vt->get_rent_epoch( vote_acc );
554 0 : fd_vote_accounts_pair_t_map_insert( new_stakes->vote_accounts.vote_accounts_pool, &new_stakes->vote_accounts.vote_accounts_root, new_node );
555 :
556 0 : }
557 :
558 : /* Stale stake delegations should also be removed or updated in the cache.
559 : TODO: This will likely be changed in the near future as the stake
560 : program is migrated to a bpf program. It will likely be replaced by an
561 : index of stake/vote accounts. */
562 :
563 0 : FD_TXN_ACCOUNT_DECL( stake_acc );
564 0 : fd_delegation_pair_t_mapnode_t * nn = NULL;
565 0 : for( fd_delegation_pair_t_mapnode_t * n = fd_delegation_pair_t_map_minimum(
566 0 : old_stakes->stake_delegations_pool, old_stakes->stake_delegations_root ); n; n=nn ) {
567 :
568 0 : nn = fd_delegation_pair_t_map_successor( old_stakes->stake_delegations_pool, n );
569 :
570 0 : int err = fd_txn_account_init_from_funk_readonly( stake_acc, &n->elem.account, snapshot_ctx->funk, NULL );
571 0 : if( FD_UNLIKELY( err ) ) {
572 : /* If the stake account doesn't exist, the cache is stale and the entry
573 : just needs to be evicted. */
574 0 : fd_delegation_pair_t_map_remove( old_stakes->stake_delegations_pool, &old_stakes->stake_delegations_root, n );
575 0 : fd_delegation_pair_t_map_release( old_stakes->stake_delegations_pool, n );
576 0 : } else {
577 : /* Otherwise, just update the delegation in case it is stale. */
578 0 : fd_stake_state_v2_t * stake_state = fd_bincode_decode_spad(
579 0 : stake_state_v2, snapshot_ctx->spad,
580 0 : stake_acc->vt->get_data( stake_acc ),
581 0 : stake_acc->vt->get_data_len( stake_acc ),
582 0 : &err );
583 0 : if( FD_UNLIKELY( err ) ) {
584 0 : FD_LOG_ERR(( "Failed to decode stake state" ));
585 0 : }
586 0 : n->elem.delegation = stake_state->inner.stake.stake.delegation;
587 0 : }
588 0 : }
589 :
590 : /* Copy over the rest of the fields as they are the same. */
591 :
592 0 : new_stakes->stake_delegations_pool = old_stakes->stake_delegations_pool;
593 0 : new_stakes->stake_delegations_root = old_stakes->stake_delegations_root;
594 0 : new_stakes->unused = old_stakes->unused;
595 0 : new_stakes->epoch = old_stakes->epoch;
596 0 : new_stakes->stake_history = old_stakes->stake_history;
597 :
598 0 : }
599 :
600 : static inline void
601 : fd_snapshot_create_populate_bank( fd_snapshot_ctx_t * snapshot_ctx,
602 0 : fd_versioned_bank_t * bank ) {
603 :
604 0 : fd_slot_bank_t * slot_bank = &snapshot_ctx->slot_bank;
605 0 : fd_epoch_bank_t * epoch_bank = &snapshot_ctx->epoch_bank;
606 :
607 : /* The blockhash queue has to be copied over along with all of its entries.
608 : As a note, the size is 300 but in fact is of size 301 due to a knwon bug
609 : in the agave client that is emulated by the firedancer client. */
610 :
611 0 : bank->blockhash_queue.last_hash_index = slot_bank->block_hash_queue.last_hash_index;
612 0 : bank->blockhash_queue.last_hash = fd_spad_alloc( snapshot_ctx->spad, FD_HASH_ALIGN, FD_HASH_FOOTPRINT );
613 0 : *bank->blockhash_queue.last_hash = *slot_bank->block_hash_queue.last_hash;
614 :
615 0 : bank->blockhash_queue.ages_len = fd_hash_hash_age_pair_t_map_size( slot_bank->block_hash_queue.ages_pool, slot_bank->block_hash_queue.ages_root);
616 0 : bank->blockhash_queue.ages = fd_spad_alloc( snapshot_ctx->spad, FD_HASH_HASH_AGE_PAIR_ALIGN, bank->blockhash_queue.ages_len * sizeof(fd_hash_hash_age_pair_t) );
617 0 : bank->blockhash_queue.max_age = FD_BLOCKHASH_QUEUE_SIZE;
618 :
619 0 : fd_block_hash_queue_t * queue = &slot_bank->block_hash_queue;
620 0 : fd_hash_hash_age_pair_t_mapnode_t * nn = NULL;
621 0 : ulong blockhash_queue_idx = 0UL;
622 0 : for( fd_hash_hash_age_pair_t_mapnode_t * n = fd_hash_hash_age_pair_t_map_minimum( queue->ages_pool, queue->ages_root ); n; n = nn ) {
623 0 : nn = fd_hash_hash_age_pair_t_map_successor( queue->ages_pool, n );
624 0 : bank->blockhash_queue.ages[ blockhash_queue_idx++ ] = n->elem;
625 0 : }
626 :
627 :
628 :
629 : /* Ancestor can be omitted to boot off of for both clients */
630 :
631 0 : bank->ancestors_len = 0UL;
632 0 : bank->ancestors = NULL;
633 :
634 0 : bank->hash = slot_bank->banks_hash;
635 0 : bank->parent_hash = slot_bank->prev_banks_hash;
636 0 : bank->parent_slot = slot_bank->prev_slot;
637 0 : bank->hard_forks = slot_bank->hard_forks;
638 0 : bank->transaction_count = slot_bank->transaction_count;
639 0 : bank->signature_count = slot_bank->parent_signature_cnt;
640 0 : bank->capitalization = slot_bank->capitalization;
641 0 : bank->tick_height = slot_bank->tick_height;
642 0 : bank->max_tick_height = slot_bank->max_tick_height;
643 :
644 : /* The hashes_per_tick needs to be copied over from the epoch bank because
645 : the pointer could go out of bounds during an epoch boundary. */
646 0 : bank->hashes_per_tick = fd_spad_alloc( snapshot_ctx->spad, alignof(ulong), sizeof(ulong) );
647 0 : *bank->hashes_per_tick = epoch_bank->hashes_per_tick;
648 :
649 0 : bank->ticks_per_slot = FD_TICKS_PER_SLOT;
650 0 : bank->ns_per_slot = epoch_bank->ns_per_slot;
651 0 : bank->genesis_creation_time = epoch_bank->genesis_creation_time;
652 0 : bank->slots_per_year = epoch_bank->slots_per_year;
653 :
654 : /* This value can be set to 0 because the Agave client recomputes this value
655 : and the firedancer client doesn't use it. */
656 :
657 0 : bank->accounts_data_len = 0UL;
658 :
659 0 : bank->slot = snapshot_ctx->slot;
660 0 : bank->epoch = fd_slot_to_epoch( &epoch_bank->epoch_schedule, bank->slot, NULL );
661 0 : bank->block_height = slot_bank->block_height;
662 :
663 : /* Collector id can be left as null for both clients */
664 :
665 0 : fd_memset( &bank->collector_id, 0, sizeof(fd_pubkey_t) );
666 :
667 0 : bank->collector_fees = slot_bank->collected_execution_fees + slot_bank->collected_priority_fees;
668 0 : bank->fee_calculator.lamports_per_signature = slot_bank->lamports_per_signature;
669 0 : bank->fee_rate_governor = slot_bank->fee_rate_governor;
670 0 : bank->collected_rent = slot_bank->collected_rent;
671 :
672 0 : bank->rent_collector.epoch = bank->epoch;
673 0 : bank->rent_collector.epoch_schedule = epoch_bank->rent_epoch_schedule;
674 0 : bank->rent_collector.slots_per_year = epoch_bank->slots_per_year;
675 0 : bank->rent_collector.rent = epoch_bank->rent;
676 :
677 0 : bank->epoch_schedule = epoch_bank->epoch_schedule;
678 0 : bank->inflation = epoch_bank->inflation;
679 :
680 : /* Unused accounts can be left as NULL for both clients. */
681 :
682 0 : fd_memset( &bank->unused_accounts, 0, sizeof(fd_unused_accounts_t) );
683 :
684 : /* We need to copy over the stakes for two epochs despite the Agave client
685 : providing the stakes for 6 epochs. These stakes need to be copied over
686 : because of the fact that the leader schedule computation uses the two
687 : previous epoch stakes.
688 :
689 : TODO: This field has been deprecated by agave and has instead been
690 : replaced with the versioned epoch stakes field in the manifest. The
691 : firedancer client will populate the deprecated field. */
692 :
693 0 : fd_epoch_epoch_stakes_pair_t * relevant_epoch_stakes = fd_spad_alloc( snapshot_ctx->spad, FD_EPOCH_EPOCH_STAKES_PAIR_ALIGN, 2UL * sizeof(fd_epoch_epoch_stakes_pair_t) );
694 0 : fd_memset( &relevant_epoch_stakes[0], 0UL, sizeof(fd_epoch_epoch_stakes_pair_t) );
695 0 : fd_memset( &relevant_epoch_stakes[1], 0UL, sizeof(fd_epoch_epoch_stakes_pair_t) );
696 0 : relevant_epoch_stakes[0].key = bank->epoch;
697 0 : relevant_epoch_stakes[0].value.stakes.vote_accounts = slot_bank->epoch_stakes;
698 0 : relevant_epoch_stakes[1].key = bank->epoch+1UL;
699 0 : relevant_epoch_stakes[1].value.stakes.vote_accounts = epoch_bank->next_epoch_stakes;
700 :
701 0 : bank->epoch_stakes_len = 2UL;
702 0 : bank->epoch_stakes = relevant_epoch_stakes;
703 0 : bank->is_delta = snapshot_ctx->is_incremental;
704 :
705 : /* The firedancer runtime currently maintains a version of the stakes which
706 : can't be reserialized into a format that is compatible with the Solana
707 : snapshot format. Therefore, we must recompute the data structure using
708 : the pubkeys from the stakes cache that is currently in the epoch context. */
709 :
710 0 : fd_snapshot_create_serialiable_stakes( snapshot_ctx, &epoch_bank->stakes, &bank->stakes );
711 :
712 0 : }
713 :
714 : static inline void
715 0 : fd_snapshot_create_setup_and_validate_ctx( fd_snapshot_ctx_t * snapshot_ctx ) {
716 :
717 0 : fd_funk_t * funk = snapshot_ctx->funk;
718 :
719 : /* First the epoch bank. */
720 :
721 0 : fd_funk_rec_key_t epoch_id = fd_runtime_epoch_bank_key();
722 0 : fd_funk_rec_query_t query[1];
723 0 : fd_funk_rec_t const * epoch_rec = fd_funk_rec_query_try( funk, NULL, &epoch_id, query );
724 0 : if( FD_UNLIKELY( !epoch_rec ) ) {
725 0 : FD_LOG_ERR(( "Failed to read epoch bank record: missing record" ));
726 0 : }
727 0 : void * epoch_val = fd_funk_val( epoch_rec, fd_funk_wksp( funk ) );
728 :
729 0 : if( FD_UNLIKELY( fd_funk_val_sz( epoch_rec )<sizeof(uint) ) ) {
730 0 : FD_LOG_ERR(( "Failed to read epoch bank record: empty record" ));
731 0 : }
732 :
733 0 : uint epoch_magic = *(uint*)epoch_val;
734 0 : if( FD_UNLIKELY( epoch_magic!=FD_RUNTIME_ENC_BINCODE ) ) {
735 0 : FD_LOG_ERR(( "Epoch bank record has wrong magic" ));
736 0 : }
737 :
738 0 : int err;
739 0 : fd_epoch_bank_t * epoch_bank = fd_bincode_decode_spad(
740 0 : epoch_bank, snapshot_ctx->spad,
741 0 : (uchar *)epoch_val + sizeof(uint),
742 0 : fd_funk_val_sz( epoch_rec ) - sizeof(uint),
743 0 : &err );
744 0 : if( FD_UNLIKELY( err!=FD_BINCODE_SUCCESS ) ) {
745 0 : FD_LOG_ERR(( "Failed to decode epoch bank" ));
746 0 : }
747 :
748 0 : snapshot_ctx->epoch_bank = *epoch_bank;
749 :
750 0 : FD_TEST( !fd_funk_rec_query_test( query ) );
751 :
752 : /* Now the slot bank. */
753 :
754 0 : fd_funk_rec_key_t slot_id = fd_runtime_slot_bank_key();
755 0 : fd_funk_rec_t const * slot_rec = fd_funk_rec_query_try( funk, NULL, &slot_id, query );
756 0 : if( FD_UNLIKELY( !slot_rec ) ) {
757 0 : FD_LOG_ERR(( "Failed to read slot bank record: missing record" ));
758 0 : }
759 0 : void * slot_val = fd_funk_val( slot_rec, fd_funk_wksp( funk ) );
760 :
761 0 : if( FD_UNLIKELY( fd_funk_val_sz( slot_rec )<sizeof(uint) ) ) {
762 0 : FD_LOG_ERR(( "Failed to read slot bank record: empty record" ));
763 0 : }
764 :
765 0 : uint slot_magic = *(uint*)slot_val;
766 0 : if( FD_UNLIKELY( slot_magic!=FD_RUNTIME_ENC_BINCODE ) ) {
767 0 : FD_LOG_ERR(( "Slot bank record has wrong magic" ));
768 0 : }
769 :
770 0 : fd_slot_bank_t * slot_bank = fd_bincode_decode_spad(
771 0 : slot_bank, snapshot_ctx->spad,
772 0 : (uchar *)slot_val + sizeof(uint),
773 0 : fd_funk_val_sz( slot_rec ) - sizeof(uint),
774 0 : &err );
775 0 : if( FD_UNLIKELY( err!=FD_BINCODE_SUCCESS ) ) {
776 0 : FD_LOG_ERR(( "Failed to decode slot bank" ));
777 0 : }
778 :
779 0 : snapshot_ctx->slot_bank = *slot_bank;
780 :
781 0 : FD_TEST( !fd_funk_rec_query_test( query ) );
782 :
783 : /* Validate that the snapshot context is setup correctly */
784 :
785 0 : if( FD_UNLIKELY( !snapshot_ctx->out_dir ) ) {
786 0 : FD_LOG_ERR(( "Snapshot directory is not set" ));
787 0 : }
788 :
789 0 : if( FD_UNLIKELY( snapshot_ctx->slot>snapshot_ctx->slot_bank.slot ) ) {
790 0 : FD_LOG_ERR(( "Snapshot slot=%lu is greater than the current slot=%lu",
791 0 : snapshot_ctx->slot, snapshot_ctx->slot_bank.slot ));
792 0 : }
793 :
794 : /* Truncate the two files used for snapshot creation and seek to its start. */
795 :
796 0 : long seek = lseek( snapshot_ctx->tmp_fd, 0, SEEK_SET );
797 0 : if( FD_UNLIKELY( seek ) ) {
798 0 : FD_LOG_ERR(( "Failed to seek to the start of the file" ));
799 0 : }
800 :
801 0 : if( FD_UNLIKELY( ftruncate( snapshot_ctx->tmp_fd, 0UL ) < 0 ) ) {
802 0 : FD_LOG_ERR(( "Failed to truncate the temporary file" ));
803 0 : }
804 :
805 0 : seek = lseek( snapshot_ctx->snapshot_fd, 0, SEEK_SET );
806 0 : if( FD_UNLIKELY( seek ) ) {
807 0 : FD_LOG_ERR(( "Failed to seek to the start of the file" ));
808 0 : }
809 :
810 0 : if( FD_UNLIKELY( ftruncate( snapshot_ctx->snapshot_fd, 0UL ) < 0 ) ) {
811 0 : FD_LOG_ERR(( "Failed to truncate the snapshot file" ));
812 0 : }
813 :
814 0 : }
815 :
816 : static inline void
817 0 : fd_snapshot_create_setup_writer( fd_snapshot_ctx_t * snapshot_ctx ) {
818 :
819 : /* Setup a tar writer. */
820 :
821 0 : uchar * writer_mem = fd_spad_alloc( snapshot_ctx->spad, fd_tar_writer_align(), fd_tar_writer_footprint() );
822 0 : snapshot_ctx->writer = fd_tar_writer_new( writer_mem, snapshot_ctx->tmp_fd );
823 0 : if( FD_UNLIKELY( !snapshot_ctx->writer ) ) {
824 0 : FD_LOG_ERR(( "Unable to create a tar writer" ));
825 0 : }
826 0 : }
827 :
828 : static inline void
829 0 : fd_snapshot_create_write_version( fd_snapshot_ctx_t * snapshot_ctx ) {
830 :
831 : /* The first file in the tar archive should be the version file.. */
832 :
833 0 : int err = fd_tar_writer_new_file( snapshot_ctx->writer, FD_SNAPSHOT_VERSION_FILE );
834 0 : if( FD_UNLIKELY( err ) ) {
835 0 : FD_LOG_ERR(( "Failed to create the version file" ));
836 0 : }
837 :
838 0 : err = fd_tar_writer_write_file_data( snapshot_ctx->writer, FD_SNAPSHOT_VERSION, FD_SNAPSHOT_VERSION_LEN);
839 0 : if( FD_UNLIKELY( err ) ) {
840 0 : FD_LOG_ERR(( "Failed to create the version file" ));
841 0 : }
842 :
843 0 : err = fd_tar_writer_fini_file( snapshot_ctx->writer );
844 0 : if( FD_UNLIKELY( err ) ) {
845 0 : FD_LOG_ERR(( "Failed to create the version file" ));
846 0 : }
847 :
848 0 : }
849 :
850 : static inline void
851 0 : fd_snapshot_create_write_status_cache( fd_snapshot_ctx_t * snapshot_ctx ) {
852 :
853 : /* First convert the existing status cache into a snapshot-friendly format. */
854 :
855 0 : fd_bank_slot_deltas_t slot_deltas_new = {0};
856 0 : int err = fd_txncache_get_entries( snapshot_ctx->status_cache,
857 0 : &slot_deltas_new,
858 0 : snapshot_ctx->spad );
859 0 : if( FD_UNLIKELY( err ) ) {
860 0 : FD_LOG_ERR(( "Failed to get entries from the status cache" ));
861 0 : }
862 0 : ulong bank_slot_deltas_sz = fd_bank_slot_deltas_size( &slot_deltas_new );
863 0 : uchar * out_status_cache = fd_spad_alloc( snapshot_ctx->spad,
864 0 : FD_BANK_SLOT_DELTAS_ALIGN,
865 0 : bank_slot_deltas_sz );
866 0 : fd_bincode_encode_ctx_t encode_status_cache = {
867 0 : .data = out_status_cache,
868 0 : .dataend = out_status_cache + bank_slot_deltas_sz,
869 0 : };
870 0 : if( FD_UNLIKELY( fd_bank_slot_deltas_encode( &slot_deltas_new, &encode_status_cache ) ) ) {
871 0 : FD_LOG_ERR(( "Failed to encode the status cache" ));
872 0 : }
873 :
874 : /* Now write out the encoded buffer to the tar archive. */
875 :
876 0 : err = fd_tar_writer_new_file( snapshot_ctx->writer, FD_SNAPSHOT_STATUS_CACHE_FILE );
877 0 : if( FD_UNLIKELY( err ) ) {
878 0 : FD_LOG_ERR(( "Failed to create the status cache file" ));
879 0 : }
880 0 : err = fd_tar_writer_write_file_data( snapshot_ctx->writer, out_status_cache, bank_slot_deltas_sz );
881 0 : if( FD_UNLIKELY( err ) ) {
882 0 : FD_LOG_ERR(( "Failed to create the status cache file" ));
883 0 : }
884 0 : err = fd_tar_writer_fini_file( snapshot_ctx->writer );
885 0 : if( FD_UNLIKELY( err ) ) {
886 0 : FD_LOG_ERR(( "Failed to create the status cache file" ));
887 0 : }
888 :
889 : /* Registers all roots and unconstipates the status cache. */
890 :
891 0 : fd_txncache_flush_constipated_slots( snapshot_ctx->status_cache );
892 :
893 0 : }
894 :
895 : static inline void
896 : fd_snapshot_create_write_manifest_and_acc_vecs( fd_snapshot_ctx_t * snapshot_ctx,
897 : fd_hash_t * out_hash,
898 0 : ulong * out_capitalization ) {
899 :
900 :
901 0 : fd_solana_manifest_t manifest = {0};
902 :
903 : /* Copy in all the fields of the bank. */
904 :
905 0 : fd_snapshot_create_populate_bank( snapshot_ctx, &manifest.bank );
906 :
907 : /* Populate the rest of the manifest, except for the append vec index. */
908 :
909 0 : manifest.lamports_per_signature = snapshot_ctx->slot_bank.lamports_per_signature;
910 0 : manifest.epoch_account_hash = &snapshot_ctx->slot_bank.epoch_account_hash;
911 :
912 : /* FIXME: The versioned epoch stakes needs to be implemented. Right now if
913 : we try to create a snapshot on or near an epoch boundary, we will produce
914 : an invalid snapshot. */
915 :
916 0 : manifest.versioned_epoch_stakes_len = 0UL;
917 0 : manifest.versioned_epoch_stakes = NULL;
918 :
919 : /* Populate the append vec index and write out the corresponding acc files. */
920 :
921 0 : ulong incr_capitalization = 0UL;
922 0 : fd_snapshot_create_populate_acc_vecs( snapshot_ctx, &manifest, snapshot_ctx->writer, &incr_capitalization );
923 :
924 : /* Once the append vec index is populated and the hashes are calculated,
925 : propogate the hashes to the correct fields. As a note, the last_snap_hash
926 : is the full snapshot's account hash. */
927 :
928 0 : if( snapshot_ctx->is_incremental ) {
929 0 : manifest.bank_incremental_snapshot_persistence->full_slot = snapshot_ctx->last_snap_slot;
930 0 : manifest.bank_incremental_snapshot_persistence->full_hash = *snapshot_ctx->last_snap_acc_hash;
931 0 : manifest.bank_incremental_snapshot_persistence->full_capitalization = snapshot_ctx->last_snap_capitalization;
932 0 : manifest.bank_incremental_snapshot_persistence->incremental_hash = snapshot_ctx->acc_hash;
933 0 : manifest.bank_incremental_snapshot_persistence->incremental_capitalization = incr_capitalization;
934 0 : } else {
935 0 : *out_hash = manifest.accounts_db.bank_hash_info.accounts_hash;
936 0 : *out_capitalization = snapshot_ctx->slot_bank.capitalization;
937 0 : }
938 :
939 : /* At this point, all of the account files are written out and the append
940 : vec index is populated in the manifest. We have already reserved space
941 : in the archive for the manifest. All we need to do now is encode the
942 : manifest and write it in. */
943 :
944 0 : ulong manifest_sz = fd_solana_manifest_size( &manifest );
945 0 : uchar * out_manifest = fd_spad_alloc( snapshot_ctx->spad, fd_solana_manifest_align(), manifest_sz );
946 :
947 0 : fd_bincode_encode_ctx_t encode = {
948 0 : .data = out_manifest,
949 0 : .dataend = out_manifest + manifest_sz
950 0 : };
951 :
952 0 : int err = fd_solana_manifest_encode( &manifest, &encode );
953 0 : if( FD_UNLIKELY( err ) ) {
954 0 : FD_LOG_ERR(( "Failed to encode the manifest" ));
955 0 : }
956 :
957 0 : err = fd_tar_writer_fill_space( snapshot_ctx->writer, out_manifest, manifest_sz );
958 0 : if( FD_UNLIKELY( err ) ) {
959 0 : FD_LOG_ERR(( "Failed to write out the manifest" ));
960 0 : }
961 :
962 0 : void * mem = fd_tar_writer_delete( snapshot_ctx->writer );
963 0 : if( FD_UNLIKELY( !mem ) ) {
964 0 : FD_LOG_ERR(( "Unable to delete the tar writer" ));
965 0 : }
966 :
967 0 : }
968 :
969 : static inline void
970 0 : fd_snapshot_create_compress( fd_snapshot_ctx_t * snapshot_ctx ) {
971 :
972 : /* Compress the file using zstd. First open the non-compressed file and
973 : create a file for the compressed file. The reason why we can't do this
974 : as we stream out the snapshot archive is that we write back into the
975 : manifest buffer.
976 :
977 : TODO: A way to eliminate this and to just stream out
978 : 1 compressed file would be to totally precompute the index such that
979 : we don't have to write back into funk.
980 :
981 : TODO: Currently, the snapshot service interfaces directly with the zstd
982 : library but a generalized cstream defined in fd_zstd should be used
983 : instead. */
984 :
985 0 : ulong in_buf_sz = ZSTD_CStreamInSize();
986 0 : ulong zstd_buf_sz = ZSTD_CStreamOutSize();
987 0 : ulong out_buf_sz = ZSTD_CStreamOutSize();
988 :
989 0 : char * in_buf = fd_spad_alloc( snapshot_ctx->spad, FD_ZSTD_CSTREAM_ALIGN, in_buf_sz );
990 0 : char * zstd_buf = fd_spad_alloc( snapshot_ctx->spad, FD_ZSTD_CSTREAM_ALIGN, out_buf_sz );
991 0 : char * out_buf = fd_spad_alloc( snapshot_ctx->spad, FD_ZSTD_CSTREAM_ALIGN, out_buf_sz );
992 :
993 : /* Reopen the tarball and open/overwrite the filename for the compressed,
994 : finalized full snapshot. Setup the zstd compression stream. */
995 :
996 0 : int err = 0;
997 :
998 0 : ZSTD_CStream * cstream = ZSTD_createCStream();
999 0 : if( FD_UNLIKELY( !cstream ) ) {
1000 0 : FD_LOG_ERR(( "Failed to create the zstd compression stream" ));
1001 0 : }
1002 0 : ZSTD_initCStream( cstream, ZSTD_CLEVEL_DEFAULT );
1003 :
1004 0 : fd_io_buffered_ostream_t ostream[1];
1005 :
1006 0 : if( FD_UNLIKELY( !fd_io_buffered_ostream_init( ostream, snapshot_ctx->snapshot_fd, out_buf, out_buf_sz ) ) ) {
1007 0 : FD_LOG_ERR(( "Failed to initialize the ostream" ));
1008 0 : }
1009 :
1010 0 : long seek = lseek( snapshot_ctx->snapshot_fd, 0, SEEK_SET );
1011 0 : if( FD_UNLIKELY( seek!=0L ) ) {
1012 0 : FD_LOG_ERR(( "Failed to seek to the start of the file" ));
1013 0 : }
1014 :
1015 : /* At this point, the tar archive and the new zstd file is open. The zstd
1016 : streamer is still open. Now, we are ready to read in bytes and stream
1017 : compress them. We will keep going until we see an EOF in a tar archive. */
1018 :
1019 0 : ulong in_sz = in_buf_sz;
1020 :
1021 0 : ulong off = (ulong)lseek( snapshot_ctx->tmp_fd, 0, SEEK_SET );
1022 0 : if( FD_UNLIKELY( off ) ) {
1023 0 : FD_LOG_ERR(( "Failed to seek to the beginning of the file" ));
1024 0 : }
1025 :
1026 0 : while( in_sz==in_buf_sz ) {
1027 :
1028 : /* Read chunks from the file. There isn't really a need to use a streamed
1029 : reader here because we will read in the max size buffer for every single
1030 : file read except for the very last one.
1031 :
1032 : in_sz will only not equal in_buf_sz on the last read. */
1033 0 : err = fd_io_read( snapshot_ctx->tmp_fd, in_buf, 0UL, in_buf_sz, &in_sz );
1034 0 : if( FD_UNLIKELY( err ) ) {
1035 0 : FD_LOG_ERR(( "Failed to read in the file" ));
1036 0 : }
1037 :
1038 : /* Compress the in memory buffer and add it to the output stream. */
1039 :
1040 0 : ZSTD_inBuffer input = { in_buf, in_sz, 0UL };
1041 0 : while( input.pos<input.size ) {
1042 0 : ZSTD_outBuffer output = { zstd_buf, zstd_buf_sz, 0UL };
1043 0 : ulong ret = ZSTD_compressStream( cstream, &output, &input );
1044 :
1045 0 : if( FD_UNLIKELY( ZSTD_isError( ret ) ) ) {
1046 0 : FD_LOG_ERR(( "Compression error: %s\n", ZSTD_getErrorName( ret ) ));
1047 0 : }
1048 :
1049 0 : err = fd_io_buffered_ostream_write( ostream, zstd_buf, output.pos );
1050 0 : if( FD_UNLIKELY( err ) ) {
1051 0 : FD_LOG_ERR(( "Failed to write out the compressed file" ));
1052 0 : }
1053 0 : }
1054 0 : }
1055 :
1056 : /* Now flush any bytes left in the zstd buffer, cleanup open file
1057 : descriptors, and deinit any data structures. */
1058 :
1059 0 : ZSTD_outBuffer output = { zstd_buf, zstd_buf_sz, 0UL };
1060 0 : ulong remaining = ZSTD_endStream( cstream, &output );
1061 :
1062 0 : if( FD_UNLIKELY( ZSTD_isError( remaining ) ) ) {
1063 0 : FD_LOG_ERR(( "Unable to end the zstd stream" ));
1064 0 : }
1065 0 : if( output.pos>0UL ) {
1066 0 : fd_io_buffered_ostream_write( ostream, zstd_buf, output.pos );
1067 0 : }
1068 :
1069 0 : ZSTD_freeCStream( cstream ); /* Works even if cstream is null */
1070 0 : err = fd_io_buffered_ostream_flush( ostream );
1071 0 : if( FD_UNLIKELY( err ) ) {
1072 0 : FD_LOG_ERR(( "Failed to flush the ostream" ));
1073 0 : }
1074 :
1075 : /* Assuming that there was a successful write, make the compressed
1076 : snapshot file readable and servable. */
1077 :
1078 0 : char tmp_directory_buf_zstd[ FD_SNAPSHOT_DIR_MAX ];
1079 0 : err = snprintf( tmp_directory_buf_zstd, FD_SNAPSHOT_DIR_MAX, "%s/%s", snapshot_ctx->out_dir, snapshot_ctx->is_incremental ? FD_SNAPSHOT_TMP_INCR_ARCHIVE_ZSTD : FD_SNAPSHOT_TMP_FULL_ARCHIVE_ZSTD );
1080 0 : if( FD_UNLIKELY( err<0 ) ) {
1081 0 : FD_LOG_ERR(( "Failed to format directory string" ));
1082 0 : }
1083 :
1084 0 : char directory_buf_zstd[ FD_SNAPSHOT_DIR_MAX ];
1085 0 : if( !snapshot_ctx->is_incremental ) {
1086 0 : err = snprintf( directory_buf_zstd, FD_SNAPSHOT_DIR_MAX, "%s/snapshot-%lu-%s.tar.zst",
1087 0 : snapshot_ctx->out_dir, snapshot_ctx->slot, FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->snap_hash) );
1088 0 : } else {
1089 0 : err = snprintf( directory_buf_zstd, FD_SNAPSHOT_DIR_MAX, "%s/incremental-snapshot-%lu-%lu-%s.tar.zst",
1090 0 : snapshot_ctx->out_dir, snapshot_ctx->last_snap_slot, snapshot_ctx->slot, FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->snap_hash) );
1091 0 : }
1092 :
1093 0 : if( FD_UNLIKELY( err<0 ) ) {
1094 0 : FD_LOG_ERR(( "Failed to format directory string" ));
1095 0 : }
1096 :
1097 0 : err = rename( tmp_directory_buf_zstd, directory_buf_zstd );
1098 0 : if( FD_UNLIKELY( err<0 ) ) {
1099 0 : FD_LOG_ERR(( "Failed to rename file from %s to %s (%i-%s)", tmp_directory_buf_zstd, directory_buf_zstd, errno, fd_io_strerror( errno ) ));
1100 0 : }
1101 :
1102 0 : }
1103 :
1104 : void
1105 : fd_snapshot_create_new_snapshot( fd_snapshot_ctx_t * snapshot_ctx,
1106 : fd_hash_t * out_hash,
1107 0 : ulong * out_capitalization ) {
1108 :
1109 0 : FD_LOG_NOTICE(( "Starting to produce a snapshot for slot=%lu in directory=%s", snapshot_ctx->slot, snapshot_ctx->out_dir ));
1110 :
1111 : /* Validate that the snapshot_ctx is setup correctly. */
1112 :
1113 0 : fd_snapshot_create_setup_and_validate_ctx( snapshot_ctx );
1114 :
1115 : /* Setup the tar archive writer. */
1116 :
1117 0 : fd_snapshot_create_setup_writer( snapshot_ctx );
1118 :
1119 : /* Write out the version file. */
1120 :
1121 0 : fd_snapshot_create_write_version( snapshot_ctx );
1122 :
1123 : /* Dump the status cache and append it to the tar archive. */
1124 :
1125 0 : fd_snapshot_create_write_status_cache( snapshot_ctx );
1126 :
1127 : /* Populate and write out the manifest and append vecs. */
1128 :
1129 0 : fd_snapshot_create_write_manifest_and_acc_vecs( snapshot_ctx, out_hash, out_capitalization );
1130 :
1131 : /* Compress the tar file and write it out to the specified directory. */
1132 :
1133 0 : fd_snapshot_create_compress( snapshot_ctx );
1134 :
1135 0 : FD_LOG_NOTICE(( "Finished producing a snapshot" ));
1136 :
1137 0 : }
|