LCOV - cov.lcov - flamenco/snapshot/fd_snapshot

LCOV - code coverage report

Current view:	top level - flamenco/snapshot - fd_snapshot_create.c (source / functions)		Hit	Total	Coverage
Test:	cov.lcov	Lines:	0	649	0.0 %
Date:	2025-06-27 11:04:50	Functions:	0	11	0.0 %

          Line data    Source code

       1             : #include "fd_snapshot_create.h"
       2             : #include "../runtime/sysvar/fd_sysvar_epoch_schedule.h"
       3             : #include "../../ballet/zstd/fd_zstd.h"
       4             : #include "../runtime/fd_hashes.h"
       5             : #include "../runtime/fd_runtime.h"
       6             : #include "../runtime/fd_cost_tracker.h"
       7             : 
       8             : #include <errno.h>
       9             : #include <stdio.h>
      10             : #include <stdlib.h>
      11             : #include <sys/stat.h>
      12             : #include <sys/types.h>
      13             : #include <unistd.h>
      14             : #include <zstd.h>
      15             : 
      16             : static uchar             padding[ FD_SNAPSHOT_ACC_ALIGN ] = {0};
      17             : static fd_account_meta_t default_meta = { .magic = FD_ACCOUNT_META_MAGIC };
      18             : 
      19             : static inline fd_account_meta_t *
      20           0 : fd_snapshot_create_get_default_meta( ulong slot ) {
      21           0 :   default_meta.slot = slot;
      22           0 :   return &default_meta;
      23           0 : }
      24             : 
      25             : static inline void
      26             : fd_snapshot_create_populate_acc_vecs( fd_snapshot_ctx_t *    snapshot_ctx,
      27             :                                       fd_solana_manifest_t * manifest,
      28             :                                       fd_tar_writer_t *      writer,
      29           0 :                                       ulong *                out_cap ) {
      30             : 
      31             :   /* The append vecs need to be described in an index in the manifest so a
      32             :      reader knows what account files to look for. These files are technically
      33             :      slot indexed, but the Firedancer implementation of the Solana snapshot
      34             :      produces far fewer indices. These storages are for the accounts
      35             :      that were modified and deleted in the most recent slot because that
      36             :      information is used by the Agave client to calculate and verify the
      37             :      bank hash for the given slot. This is done as an optimization to avoid
      38             :      having to slot index the Firedancer accounts db which would incur a large
      39             :      performance hit.
      40             : 
      41             :      To avoid iterating through the root twice to determine what accounts were
      42             :      touched in the snapshot slot and what accounts were touched in the
      43             :      other slots, we will create an array of pubkey pointers for all accounts
      44             :      that were touched in the snapshot slot. This buffer can be safely sized to
      45             :      the maximum amount of writable accounts that are possible in a non-epoch
      46             :      boundary slot. The rationale for this bound is explained in fd_runtime.h.
      47             :      We will not attempt to create a snapshot on an epoch boundary.
      48             : 
      49             :      TODO: We must add compaction here. */
      50             : 
      51           0 :   fd_pubkey_t * * snapshot_slot_keys    = fd_spad_alloc( snapshot_ctx->spad, alignof(fd_pubkey_t*), sizeof(fd_pubkey_t*) * FD_WRITABLE_ACCS_IN_SLOT );
      52           0 :   ulong           snapshot_slot_key_cnt = 0UL;
      53             : 
      54             :   /* We will dynamically resize the number of incremental keys because the upper
      55             :      bound will be roughly 8 bytes * writable accs in a slot * number of slots
      56             :      since the last full snapshot which can quickly grow to be severalgigabytes
      57             :      or more. In the normal case, this won't require dynamic resizing. */
      58           0 :   #define FD_INCREMENTAL_KEY_INIT_BOUND (100000UL)
      59           0 :   ulong                          incremental_key_bound = FD_INCREMENTAL_KEY_INIT_BOUND;
      60           0 :   ulong                          incremental_key_cnt   = 0UL;
      61           0 :   fd_funk_rec_key_t const * * incremental_keys         = snapshot_ctx->is_incremental ?
      62           0 :                                                       fd_spad_alloc( snapshot_ctx->spad, alignof(fd_funk_rec_key_t*), sizeof(fd_funk_rec_key_t*) * incremental_key_bound ) :
      63           0 :                                                       NULL;
      64             : 
      65           0 :   #undef FD_INCREMENTAL_KEY_INIT_BOUND
      66             : 
      67             :   /* In order to size out the accounts DB index in the manifest, we must
      68             :      iterate through funk and accumulate the size of all of the records
      69             :      from all slots before the snapshot_slot. */
      70             : 
      71           0 :   fd_funk_t * funk           = snapshot_ctx->funk;
      72           0 :   ulong       prev_sz        = 0UL;
      73           0 :   ulong       tombstones_cnt = 0UL;
      74           0 :   for( fd_funk_rec_t const * rec = fd_funk_txn_first_rec( funk, NULL ); NULL != rec; rec = fd_funk_txn_next_rec( funk, rec ) ) {
      75             : 
      76           0 :     if( !fd_funk_key_is_acc( rec->pair.key ) ) {
      77           0 :       continue;
      78           0 :     }
      79             : 
      80           0 :     tombstones_cnt++;
      81             : 
      82           0 :     int                 is_tombstone = rec->flags & FD_FUNK_REC_FLAG_ERASE;
      83           0 :     uchar const *       raw          = fd_funk_val( rec, fd_funk_wksp( funk ) );
      84           0 :     fd_account_meta_t * metadata     = is_tombstone ? fd_snapshot_create_get_default_meta( fd_funk_rec_get_erase_data( rec ) ) :
      85           0 :                                                       (fd_account_meta_t*)raw;
      86             : 
      87           0 :     if( !metadata ) {
      88           0 :       continue;
      89           0 :     }
      90             : 
      91           0 :     if( metadata->magic!=FD_ACCOUNT_META_MAGIC ) {
      92           0 :       continue;
      93           0 :     }
      94             : 
      95           0 :     if( snapshot_ctx->is_incremental ) {
      96             :       /* We only care about accounts that were modified since the last
      97             :          snapshot slot for incremental snapshots.
      98             : 
      99             :          We also need to keep track of the capitalization for all of the
     100             :          accounts that are in the incremental as this is verified. */
     101           0 :       if( metadata->slot<=snapshot_ctx->last_snap_slot ) {
     102           0 :         continue;
     103           0 :       }
     104           0 :       incremental_keys[ incremental_key_cnt++ ] = rec->pair.key;
     105           0 :       *out_cap += metadata->info.lamports;
     106             : 
     107           0 :       if( FD_UNLIKELY( incremental_key_cnt==incremental_key_bound ) ) {
     108             :         /* Dynamically resize if needed. */
     109           0 :         incremental_key_bound *= 2UL;
     110           0 :         fd_funk_rec_key_t const * * new_incremental_keys = fd_spad_alloc( snapshot_ctx->spad,
     111           0 :                                                                          alignof(fd_funk_rec_key_t*),
     112           0 :                                                                             sizeof(fd_funk_rec_key_t*) * incremental_key_bound );
     113           0 :         fd_memcpy( new_incremental_keys, incremental_keys, sizeof(fd_funk_rec_key_t*) * incremental_key_cnt );
     114           0 :         fd_valloc_free( fd_spad_virtual( snapshot_ctx->spad ), incremental_keys );
     115           0 :         incremental_keys = new_incremental_keys;
     116           0 :       }
     117           0 :     }
     118             : 
     119             :     /* We know that all of the accounts from the snapshot slot can fit into
     120             :        one append vec, so we ignore all accounts from the snapshot slot. */
     121             : 
     122           0 :     if( metadata->slot==snapshot_ctx->slot ) {
     123           0 :       continue;
     124           0 :     }
     125             : 
     126           0 :     prev_sz += metadata->dlen + sizeof(fd_solana_account_hdr_t);
     127             : 
     128           0 :   }
     129             : 
     130             :   /* At this point we have sized out all of the relevant accounts that will
     131             :      be included in the snapshot. Now we must populate each of the append vecs
     132             :      and update the index as we go.
     133             : 
     134             :      When we account for the number of slots we need to consider one append vec
     135             :      for the snapshot slot and try to maximally fill up the others: an append
     136             :      vec has a protocol-defined maximum size in Agave.  */
     137             : 
     138           0 :   ulong num_slots = 1UL + prev_sz / FD_SNAPSHOT_APPEND_VEC_SZ_MAX +
     139           0 :                     (prev_sz % FD_SNAPSHOT_APPEND_VEC_SZ_MAX ? 1UL : 0UL);
     140             : 
     141           0 :   fd_solana_accounts_db_fields_t * accounts_db = &manifest->accounts_db;
     142             : 
     143           0 :   accounts_db->storages_len                    = num_slots;
     144           0 :   accounts_db->storages                        = fd_spad_alloc( snapshot_ctx->spad,
     145           0 :                                                                 FD_SNAPSHOT_SLOT_ACC_VECS_ALIGN,
     146           0 :                                                                 sizeof(fd_snapshot_slot_acc_vecs_t) * accounts_db->storages_len );
     147           0 :   accounts_db->version                        = 1UL;
     148           0 :   accounts_db->slot                           = snapshot_ctx->slot;
     149           0 :   accounts_db->historical_roots_len           = 0UL;
     150           0 :   accounts_db->historical_roots               = NULL;
     151           0 :   accounts_db->historical_roots_with_hash_len = 0UL;
     152           0 :   accounts_db->historical_roots_with_hash     = NULL;
     153             : 
     154           0 :   for( ulong i=0UL; i<num_slots; i++ ) {
     155             :     /* Populate the storages for each slot. As a note, the slot number only
     156             :        matters for the snapshot slot. The other slot numbers don't affect
     157             :        consensus at all. Agave also maintains an invariant that there can
     158             :        only be one account vec per storage. */
     159             : 
     160           0 :     accounts_db->storages[ i ].account_vecs_len          = 1UL;
     161           0 :     accounts_db->storages[ i ].account_vecs              = fd_spad_alloc( snapshot_ctx->spad,
     162           0 :                                                                           FD_SNAPSHOT_ACC_VEC_ALIGN,
     163           0 :                                                                           sizeof(fd_snapshot_acc_vec_t) * accounts_db->storages[ i ].account_vecs_len );
     164           0 :     accounts_db->storages[ i ].account_vecs[ 0 ].file_sz = 0UL;
     165           0 :     accounts_db->storages[ i ].account_vecs[ 0 ].id      = i + 1UL;
     166           0 :     accounts_db->storages[ i ].slot                      = snapshot_ctx->slot - i;
     167           0 :   }
     168             : 
     169             :   /* At this point we have iterated through all of the accounts and created
     170             :      the index. We are now ready to generate a snapshot hash. For both
     171             :      snapshots we need to generate two hashes:
     172             :      1. The accounts hash. This is a simple hash of all of the accounts
     173             :         included in the snapshot.
     174             :      2. The snapshot hash. This is a hash of the accounts hash and the epoch
     175             :         account hash. If the EAH is not included, then the accounts hash ==
     176             :         snapshot hash.
     177             : 
     178             :     There is some nuance as to which hash goes where. For full snapshots,
     179             :     the accounts hash in the bank hash info is the accounts hash. The hash in
     180             :     the filename is the snapshot hash.
     181             : 
     182             :     For incremental snapshots, the account hash in the bank hash info field is
     183             :     left zeroed out. The full snapshot's hash is in the incremental persistence
     184             :     field. The incremental snapshot's accounts hash is included in the
     185             :     incremental persistence field. The hash in the filename is the snapshot
     186             :     hash. */
     187             : 
     188           0 :   int err;
     189           0 :   if( !snapshot_ctx->is_incremental ) {
     190             : 
     191           0 :     err = fd_snapshot_service_hash( &snapshot_ctx->acc_hash,
     192           0 :                                     &snapshot_ctx->snap_hash,
     193           0 :                                     &snapshot_ctx->slot_bank,
     194           0 :                                     &snapshot_ctx->epoch_bank,
     195           0 :                                     snapshot_ctx->funk,
     196           0 :                                     snapshot_ctx->tpool,
     197           0 :                                     snapshot_ctx->spad,
     198           0 :                                     snapshot_ctx->features );
     199           0 :     accounts_db->bank_hash_info.accounts_hash = snapshot_ctx->acc_hash;
     200           0 :   } else {
     201           0 :     err = fd_snapshot_service_inc_hash( &snapshot_ctx->acc_hash,
     202           0 :                                         &snapshot_ctx->snap_hash,
     203           0 :                                         &snapshot_ctx->slot_bank,
     204           0 :                                         &snapshot_ctx->epoch_bank,
     205           0 :                                         snapshot_ctx->funk,
     206           0 :                                         incremental_keys,
     207           0 :                                         incremental_key_cnt,
     208           0 :                                         snapshot_ctx->spad,
     209           0 :                                         snapshot_ctx->features );
     210           0 :     fd_valloc_free( fd_spad_virtual( snapshot_ctx->spad ), incremental_keys );
     211             : 
     212           0 :     fd_memset( &accounts_db->bank_hash_info.accounts_hash, 0, sizeof(fd_hash_t) );
     213           0 :   }
     214             : 
     215           0 :   FD_LOG_NOTICE(( "Hashes calculated acc_hash=%s snapshot_hash=%s",
     216           0 :                   FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->acc_hash),
     217           0 :                   FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->snap_hash) ));
     218             : 
     219           0 :   if( FD_UNLIKELY( err ) ) {
     220           0 :     FD_LOG_ERR(( "Unable to calculate snapshot hash" ));
     221           0 :   }
     222             : 
     223           0 :   fd_memset( &accounts_db->bank_hash_info.stats, 0, sizeof(fd_bank_hash_stats_t) );
     224             : 
     225             :   /* Now, we have calculated the relevant hashes for the accounts.
     226             :      Because the files are serially written out for tar and we need to prepend
     227             :      the manifest, we must reserve space in the archive for the solana manifest. */
     228             : 
     229           0 :   if( snapshot_ctx->is_incremental ) {
     230           0 :     manifest->bank_incremental_snapshot_persistence = fd_spad_alloc( snapshot_ctx->spad,
     231           0 :                                                                      FD_BANK_INCREMENTAL_SNAPSHOT_PERSISTENCE_ALIGN,
     232           0 :                                                                      sizeof(fd_bank_incremental_snapshot_persistence_t) );
     233           0 :   }
     234             : 
     235           0 :   ulong manifest_sz = fd_solana_manifest_size( manifest );
     236             : 
     237           0 :   char buffer[ FD_SNAPSHOT_DIR_MAX ];
     238           0 :   err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "snapshots/%lu/%lu", snapshot_ctx->slot, snapshot_ctx->slot );
     239           0 :   if( FD_UNLIKELY( err<0 ) ) {
     240           0 :     FD_LOG_ERR(( "Unable to format manifest name string" ));
     241           0 :   }
     242             : 
     243           0 :   err = fd_tar_writer_new_file( writer, buffer );
     244           0 :   if( FD_UNLIKELY( err ) ) {
     245           0 :     FD_LOG_ERR(( "Unable to create snapshot manifest file" ));
     246           0 :   }
     247             : 
     248             :   /* TODO: We want to eliminate having to write back into the tar file. This
     249             :      will enable the snapshot service to only use one file per snapshot.
     250             :      In order to do this, we must precompute the index in the manifest
     251             :      completely. This will allow us to stream out a compressed snapshot. */
     252             : 
     253           0 :   err = fd_tar_writer_make_space( writer, manifest_sz );
     254           0 :   if( FD_UNLIKELY( err ) ) {
     255           0 :     FD_LOG_ERR(( "Unable to make space for snapshot manifest file" ));
     256           0 :   }
     257             : 
     258           0 :   err = fd_tar_writer_fini_file( writer );
     259           0 :   if( FD_UNLIKELY( err ) ) {
     260           0 :     FD_LOG_ERR(( "Unable to finalize snapshot manifest file" ));
     261           0 :   }
     262             : 
     263             :   /* We have made space for the manifest and are ready to append the append
     264             :      vec files directly into the tar archive. We will iterate through all of
     265             :      the records in the funk root and create/populate an append vec for
     266             :      previous slots. Just record the pubkeys for the latest slot to populate
     267             :      the append vec after. If the append vec is full, write into the next one. */
     268             : 
     269           0 :   ulong curr_slot = 1UL;
     270           0 :   fd_snapshot_acc_vec_t * prev_accs = &accounts_db->storages[ curr_slot ].account_vecs[ 0UL ];
     271             : 
     272           0 :   err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "accounts/%lu.%lu", snapshot_ctx->slot - curr_slot, prev_accs->id );
     273           0 :   if( FD_UNLIKELY( err<0 ) ) {
     274           0 :     FD_LOG_ERR(( "Unable to format previous accounts name string" ));
     275           0 :   }
     276             : 
     277           0 :   err = fd_tar_writer_new_file( writer, buffer );
     278           0 :   if( FD_UNLIKELY( err ) ) {
     279           0 :     FD_LOG_ERR(( "Unable to create previous accounts file" ));
     280           0 :   }
     281             : 
     282           0 :   fd_funk_rec_t * * tombstones = snapshot_ctx->is_incremental ? NULL :
     283           0 :     fd_spad_alloc( snapshot_ctx->spad, alignof(fd_funk_rec_t*), sizeof(fd_funk_rec_t*) * tombstones_cnt );
     284           0 :   tombstones_cnt = 0UL;
     285             : 
     286           0 :   for( fd_funk_rec_t const * rec = fd_funk_txn_first_rec( funk, NULL ); NULL != rec; rec = fd_funk_txn_next_rec( funk, rec ) ) {
     287             : 
     288             :     /* Get the account data. */
     289             : 
     290           0 :     if( !fd_funk_key_is_acc( rec->pair.key ) ) {
     291           0 :       continue;
     292           0 :     }
     293             : 
     294           0 :     fd_pubkey_t const * pubkey       = fd_type_pun_const( rec->pair.key[0].uc );
     295           0 :     int                 is_tombstone = rec->flags & FD_FUNK_REC_FLAG_ERASE;
     296           0 :     uchar const *       raw          = fd_funk_val( rec, fd_funk_wksp( funk ) );
     297           0 :     fd_account_meta_t * metadata     = is_tombstone ? fd_snapshot_create_get_default_meta( fd_funk_rec_get_erase_data( rec ) ) :
     298           0 :                                                       (fd_account_meta_t*)raw;
     299             : 
     300           0 :     if( !snapshot_ctx->is_incremental && is_tombstone ) {
     301             :       /* If we are in a full snapshot, we need to gather all of the accounts
     302             :          that we plan on deleting. */
     303           0 :       tombstones[ tombstones_cnt++ ] = (fd_funk_rec_t*)rec;
     304           0 :     }
     305             : 
     306           0 :     if( !metadata ) {
     307           0 :       continue;
     308           0 :     }
     309             : 
     310           0 :     if( metadata->magic!=FD_ACCOUNT_META_MAGIC ) {
     311           0 :       continue;
     312           0 :     }
     313             : 
     314             :     /* Don't iterate through accounts that were touched before the last full
     315             :        snapshot. */
     316           0 :     if( snapshot_ctx->is_incremental && metadata->slot<=snapshot_ctx->last_snap_slot ) {
     317           0 :       continue;
     318           0 :     }
     319             : 
     320           0 :     uchar const * acc_data = raw + metadata->hlen;
     321             : 
     322             :     /* All accounts that were touched in the snapshot slot should be in
     323             :        a different append vec so that Agave can calculate the snapshot slot's
     324             :        bank hash. We don't want to include them in an arbitrary append vec. */
     325             : 
     326           0 :     if( metadata->slot==snapshot_ctx->slot ) {
     327           0 :       snapshot_slot_keys[ snapshot_slot_key_cnt++ ] = (fd_pubkey_t*)pubkey;
     328           0 :       continue;
     329           0 :     }
     330             : 
     331             :     /* We don't want to iterate over tombstones if the snapshot is not
     332             :        incremental */
     333           0 :     if( !snapshot_ctx->is_incremental && is_tombstone ) {
     334           0 :       continue;
     335           0 :     }
     336             : 
     337           0 :     ulong new_sz = prev_accs->file_sz + sizeof(fd_solana_account_hdr_t) + fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN );
     338             : 
     339           0 :     if( new_sz>FD_SNAPSHOT_APPEND_VEC_SZ_MAX ) {
     340             : 
     341             :       /* When the current append vec is full, finish writing it, start writing
     342             :          into the next append vec. */
     343             : 
     344           0 :       err = fd_tar_writer_fini_file( writer );
     345           0 :       if( FD_UNLIKELY( err ) ) {
     346           0 :         FD_LOG_ERR(( "Unable to finalize previous accounts file" ));
     347           0 :       }
     348             : 
     349           0 :       prev_accs = &accounts_db->storages[ ++curr_slot ].account_vecs[ 0UL ];
     350             : 
     351           0 :       err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "accounts/%lu.%lu", snapshot_ctx->slot - curr_slot, prev_accs->id );
     352           0 :       if( FD_UNLIKELY( err<0 ) ) {
     353           0 :         FD_LOG_ERR(( "Unable to format previous accounts name string" ));
     354           0 :       }
     355             : 
     356           0 :       err = fd_tar_writer_new_file( writer, buffer );
     357           0 :       if( FD_UNLIKELY( err ) ) {
     358           0 :         FD_LOG_ERR(( "Unable to create previous accounts file" ));
     359           0 :       }
     360           0 :     }
     361             : 
     362           0 :     prev_accs->file_sz += sizeof(fd_solana_account_hdr_t) + fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN );
     363             : 
     364             : 
     365             :     /* Write out the header. */
     366             : 
     367           0 :     fd_solana_account_hdr_t header = {0};
     368             :     /* Stored meta */
     369           0 :     header.meta.write_version_obsolete = 0UL;
     370           0 :     header.meta.data_len               = metadata->dlen;
     371           0 :     fd_memcpy( header.meta.pubkey, pubkey, sizeof(fd_pubkey_t) );
     372             :     /* Account Meta */
     373           0 :     header.info.lamports               = metadata->info.lamports;
     374           0 :     header.info.rent_epoch             = header.info.lamports ? metadata->info.rent_epoch : 0UL;
     375           0 :     fd_memcpy( header.info.owner, metadata->info.owner, sizeof(fd_pubkey_t) );
     376           0 :     header.info.executable             = metadata->info.executable;
     377             :     /* Hash */
     378           0 :     fd_memcpy( &header.hash, metadata->hash, sizeof(fd_hash_t) );
     379             : 
     380           0 :     err = fd_tar_writer_write_file_data( writer, &header, sizeof(fd_solana_account_hdr_t) );
     381           0 :     if( FD_UNLIKELY( err ) ) {
     382           0 :       FD_LOG_ERR(( "Unable to stream out account header to tar archive" ));
     383           0 :     }
     384             : 
     385             :     /* Write out the file data. */
     386             : 
     387           0 :     err = fd_tar_writer_write_file_data( writer, acc_data, metadata->dlen );
     388           0 :     if( FD_UNLIKELY( err ) ) {
     389           0 :       FD_LOG_ERR(( "Unable to stream out account data to tar archive" ));
     390           0 :     }
     391             : 
     392           0 :     ulong align_sz = fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN ) - metadata->dlen;
     393           0 :     err = fd_tar_writer_write_file_data( writer, padding, align_sz );
     394           0 :     if( FD_UNLIKELY( err ) ) {
     395           0 :       FD_LOG_ERR( ("Unable to stream out account padding to tar archive" ));
     396           0 :     }
     397           0 :   }
     398             : 
     399           0 :   err = fd_tar_writer_fini_file( writer );
     400           0 :   if( FD_UNLIKELY( err ) ) {
     401           0 :     FD_LOG_ERR(( "Unable to finalize previous accounts file" ));
     402           0 :   }
     403             : 
     404             :   /* Now write out the append vec for the snapshot slot. Again, this is needed
     405             :      because the snapshot slot's accounts must be in their append vec in order
     406             :      to verify the bank hash for the snapshot slot in the Agave client. */
     407             : 
     408           0 :   fd_snapshot_acc_vec_t * curr_accs = &accounts_db->storages[ 0UL ].account_vecs[ 0UL ];
     409           0 :   err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "accounts/%lu.%lu", snapshot_ctx->slot, curr_accs->id );
     410           0 :   if( FD_UNLIKELY( err<0 ) ) {
     411           0 :     FD_LOG_ERR(( "Unable to format current accounts name string" ));
     412           0 :   }
     413             : 
     414           0 :   err = fd_tar_writer_new_file( writer, buffer );
     415           0 :   if( FD_UNLIKELY( err ) ) {
     416           0 :     FD_LOG_ERR(( "Unable to create current accounts file" ));
     417           0 :   }
     418             : 
     419           0 :   for( ulong i=0UL; i<snapshot_slot_key_cnt; i++ ) {
     420             : 
     421           0 :     fd_pubkey_t const * pubkey = snapshot_slot_keys[i];
     422           0 :     fd_funk_rec_key_t key = fd_funk_acc_key( pubkey );
     423             : 
     424           0 :     fd_funk_rec_query_t query[1];
     425           0 :     fd_funk_rec_t const * rec = fd_funk_rec_query_try( funk, NULL, &key, query );
     426           0 :     if( FD_UNLIKELY( !rec ) ) {
     427           0 :       FD_LOG_ERR(( "Previously found record can no longer be found" ));
     428           0 :     }
     429             : 
     430           0 :     int                 is_tombstone = rec->flags & FD_FUNK_REC_FLAG_ERASE;
     431           0 :     uchar       const * raw          = fd_funk_val( rec, fd_funk_wksp( funk ) );
     432           0 :     fd_account_meta_t * metadata     = is_tombstone ? fd_snapshot_create_get_default_meta( fd_funk_rec_get_erase_data( rec ) ) :
     433           0 :                                                       (fd_account_meta_t*)raw;
     434             : 
     435           0 :     if( FD_UNLIKELY( !metadata ) ) {
     436           0 :       FD_LOG_ERR(( "Record should have non-NULL metadata" ));
     437           0 :     }
     438             : 
     439           0 :     if( FD_UNLIKELY( metadata->magic!=FD_ACCOUNT_META_MAGIC ) ) {
     440           0 :       FD_LOG_ERR(( "Record should have valid magic" ));
     441           0 :     }
     442             : 
     443           0 :     uchar const * acc_data = raw + metadata->hlen;
     444             : 
     445           0 :     curr_accs->file_sz += sizeof(fd_solana_account_hdr_t) + fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN );
     446             : 
     447             :     /* Write out the header. */
     448           0 :     fd_solana_account_hdr_t header = {0};
     449             :     /* Stored meta */
     450           0 :     header.meta.write_version_obsolete = 0UL;
     451           0 :     header.meta.data_len               = metadata->dlen;
     452           0 :     fd_memcpy( header.meta.pubkey, pubkey, sizeof(fd_pubkey_t) );
     453             :     /* Account Meta */
     454           0 :     header.info.lamports               = metadata->info.lamports;
     455           0 :     header.info.rent_epoch             = header.info.lamports ? metadata->info.rent_epoch : 0UL;
     456           0 :     fd_memcpy( header.info.owner, metadata->info.owner, sizeof(fd_pubkey_t) );
     457           0 :     header.info.executable             = metadata->info.executable;
     458             :     /* Hash */
     459           0 :     fd_memcpy( &header.hash, metadata->hash, sizeof(fd_hash_t) );
     460             : 
     461             : 
     462           0 :     err = fd_tar_writer_write_file_data( writer, &header, sizeof(fd_solana_account_hdr_t) );
     463           0 :     if( FD_UNLIKELY( err ) ) {
     464           0 :       FD_LOG_ERR(( "Unable to stream out account header to tar archive" ));
     465           0 :     }
     466           0 :     err = fd_tar_writer_write_file_data( writer, acc_data, metadata->dlen );
     467           0 :     if( FD_UNLIKELY( err ) ) {
     468           0 :       FD_LOG_ERR(( "Unable to stream out account data to tar archive" ));
     469           0 :     }
     470           0 :     ulong align_sz = fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN ) - metadata->dlen;
     471           0 :     err = fd_tar_writer_write_file_data( writer, padding, align_sz );
     472           0 :     if( FD_UNLIKELY( err ) ) {
     473           0 :       FD_LOG_ERR(( "Unable to stream out account padding to tar archive" ));
     474           0 :     }
     475             : 
     476           0 :     FD_TEST( !fd_funk_rec_query_test( query ) );
     477           0 :   }
     478             : 
     479           0 :   err = fd_tar_writer_fini_file( writer );
     480           0 :   if( FD_UNLIKELY( err ) ) {
     481           0 :     FD_LOG_ERR(( "Unable to finish writing out file" ));
     482           0 :   }
     483             : 
     484             :   /* TODO: At this point we must implement compaction to the snapshot service.
     485             :      Without this, we are actually not cleaning up any tombstones from funk. */
     486             : 
     487           0 :   if( snapshot_ctx->is_incremental ) {
     488           0 :     err = fd_funk_rec_forget( funk, tombstones, tombstones_cnt );
     489           0 :     if( FD_UNLIKELY( err!=FD_FUNK_SUCCESS ) ) {
     490           0 :       FD_LOG_ERR(( "Unable to forget tombstones" ));
     491           0 :     }
     492           0 :     FD_LOG_NOTICE(( "Compacted %lu tombstone records", tombstones_cnt ));
     493           0 :   }
     494             : 
     495           0 :   fd_valloc_free( fd_spad_virtual( snapshot_ctx->spad ), snapshot_slot_keys );
     496           0 :   fd_valloc_free( fd_spad_virtual( snapshot_ctx->spad ), tombstones );
     497             : 
     498           0 : }
     499             : 
     500             : static void
     501             : fd_snapshot_create_serialiable_stakes( fd_snapshot_ctx_t * snapshot_ctx,
     502             :                                        fd_stakes_delegation_t *       old_stakes,
     503           0 :                                        fd_stakes_delegation_t *       new_stakes ) {
     504             : 
     505             :   /* The deserialized stakes cache that is used by the runtime can't be
     506             :      reserialized into the format that Agave uses. For every vote account
     507             :      in the stakes struct, the Firedancer client holds a decoded copy of the
     508             :      vote state. However, this vote state can't be reserialized back into the
     509             :      full vote account data.
     510             : 
     511             :      This poses a problem in the Agave client client because upon boot, Agave
     512             :      verifies that for all of the vote accounts in the stakes struct, the data
     513             :      in the cache is the same as the data in the accounts db.
     514             : 
     515             :      The other problem is that the Firedancer stakes cache does not evict old
     516             :      entries and doesn't update delegations within the cache. The cache will
     517             :      just insert new pubkeys as stake accounts are created/delegated to. To
     518             :      make the cache conformant for the snapshot, old accounts should be removed
     519             :      from the snapshot and all of the delegations should be updated. */
     520             : 
     521             :   /* First populate the vote accounts using the vote accounts/stakes cache.
     522             :      We can populate over all of the fields except we can't reserialize the
     523             :      vote account data. Instead we will copy over the raw contents of all of
     524             :      the vote accounts. */
     525             : 
     526           0 :   ulong   vote_accounts_len                    = fd_vote_accounts_pair_t_map_size( old_stakes->vote_accounts.vote_accounts_pool, old_stakes->vote_accounts.vote_accounts_root );
     527           0 :   uchar * pool_mem                             = fd_spad_alloc( snapshot_ctx->spad, fd_vote_accounts_pair_t_map_align(), fd_vote_accounts_pair_t_map_footprint( vote_accounts_len ) );
     528           0 :   new_stakes->vote_accounts.vote_accounts_pool = fd_vote_accounts_pair_t_map_join( fd_vote_accounts_pair_t_map_new( pool_mem, vote_accounts_len ) );
     529           0 :   new_stakes->vote_accounts.vote_accounts_root = NULL;
     530             : 
     531           0 :   for( fd_vote_accounts_pair_t_mapnode_t * n = fd_vote_accounts_pair_t_map_minimum(
     532           0 :        old_stakes->vote_accounts.vote_accounts_pool,
     533           0 :        old_stakes->vote_accounts.vote_accounts_root );
     534           0 :        n;
     535           0 :        n = fd_vote_accounts_pair_t_map_successor( old_stakes->vote_accounts.vote_accounts_pool, n ) ) {
     536             : 
     537           0 :     fd_vote_accounts_pair_t_mapnode_t * new_node = fd_vote_accounts_pair_t_map_acquire( new_stakes->vote_accounts.vote_accounts_pool );
     538           0 :     new_node->elem.key   = n->elem.key;
     539           0 :     new_node->elem.stake = n->elem.stake;
     540             :     /* Now to populate the value, lookup the account using the acc mgr */
     541           0 :     FD_TXN_ACCOUNT_DECL( vote_acc );
     542           0 :     int err = fd_txn_account_init_from_funk_readonly( vote_acc, &n->elem.key, snapshot_ctx->funk, NULL );
     543           0 :     if( FD_UNLIKELY( err ) ) {
     544           0 :       FD_LOG_ERR(( "Failed to view vote account from stakes cache %s", FD_BASE58_ENC_32_ALLOCA(&n->elem.key) ));
     545           0 :     }
     546             : 
     547           0 :     new_node->elem.value.lamports   = vote_acc->vt->get_lamports( vote_acc );
     548           0 :     new_node->elem.value.data_len   = vote_acc->vt->get_data_len( vote_acc );
     549           0 :     new_node->elem.value.data       = fd_spad_alloc( snapshot_ctx->spad, 8UL, vote_acc->vt->get_data_len( vote_acc ) );
     550           0 :     fd_memcpy( new_node->elem.value.data, vote_acc->vt->get_data( vote_acc ), vote_acc->vt->get_data_len( vote_acc ) );
     551           0 :     new_node->elem.value.owner      = *vote_acc->vt->get_owner( vote_acc );
     552           0 :     new_node->elem.value.executable = (uchar)vote_acc->vt->is_executable( vote_acc );
     553           0 :     new_node->elem.value.rent_epoch = vote_acc->vt->get_rent_epoch( vote_acc );
     554           0 :     fd_vote_accounts_pair_t_map_insert( new_stakes->vote_accounts.vote_accounts_pool, &new_stakes->vote_accounts.vote_accounts_root, new_node );
     555             : 
     556           0 :   }
     557             : 
     558             :   /* Stale stake delegations should also be removed or updated in the cache.
     559             :      TODO: This will likely be changed in the near future as the stake
     560             :      program is migrated to a bpf program. It will likely be replaced by an
     561             :      index of stake/vote accounts. */
     562             : 
     563           0 :   FD_TXN_ACCOUNT_DECL( stake_acc );
     564           0 :   fd_delegation_pair_t_mapnode_t *      nn = NULL;
     565           0 :   for( fd_delegation_pair_t_mapnode_t * n  = fd_delegation_pair_t_map_minimum(
     566           0 :       old_stakes->stake_delegations_pool, old_stakes->stake_delegations_root ); n; n=nn ) {
     567             : 
     568           0 :     nn = fd_delegation_pair_t_map_successor( old_stakes->stake_delegations_pool, n );
     569             : 
     570           0 :     int err = fd_txn_account_init_from_funk_readonly( stake_acc, &n->elem.account, snapshot_ctx->funk, NULL );
     571           0 :     if( FD_UNLIKELY( err ) ) {
     572             :       /* If the stake account doesn't exist, the cache is stale and the entry
     573             :          just needs to be evicted. */
     574           0 :       fd_delegation_pair_t_map_remove( old_stakes->stake_delegations_pool, &old_stakes->stake_delegations_root, n );
     575           0 :       fd_delegation_pair_t_map_release( old_stakes->stake_delegations_pool, n );
     576           0 :     } else {
     577             :       /* Otherwise, just update the delegation in case it is stale. */
     578           0 :       fd_stake_state_v2_t * stake_state = fd_bincode_decode_spad(
     579           0 :           stake_state_v2, snapshot_ctx->spad,
     580           0 :           stake_acc->vt->get_data( stake_acc ),
     581           0 :           stake_acc->vt->get_data_len( stake_acc ),
     582           0 :           &err );
     583           0 :       if( FD_UNLIKELY( err ) ) {
     584           0 :         FD_LOG_ERR(( "Failed to decode stake state" ));
     585           0 :       }
     586           0 :       n->elem.delegation = stake_state->inner.stake.stake.delegation;
     587           0 :     }
     588           0 :   }
     589             : 
     590             :   /* Copy over the rest of the fields as they are the same. */
     591             : 
     592           0 :   new_stakes->stake_delegations_pool = old_stakes->stake_delegations_pool;
     593           0 :   new_stakes->stake_delegations_root = old_stakes->stake_delegations_root;
     594           0 :   new_stakes->unused                 = old_stakes->unused;
     595           0 :   new_stakes->epoch                  = old_stakes->epoch;
     596           0 :   new_stakes->stake_history          = old_stakes->stake_history;
     597             : 
     598           0 : }
     599             : 
     600             : static inline void
     601             : fd_snapshot_create_populate_bank( fd_snapshot_ctx_t *   snapshot_ctx,
     602           0 :                                   fd_versioned_bank_t * bank ) {
     603             : 
     604           0 :   fd_slot_bank_t  * slot_bank  = &snapshot_ctx->slot_bank;
     605           0 :   fd_epoch_bank_t * epoch_bank = &snapshot_ctx->epoch_bank;
     606             : 
     607             :   /* The blockhash queue has to be copied over along with all of its entries.
     608             :      As a note, the size is 300 but in fact is of size 301 due to a knwon bug
     609             :      in the agave client that is emulated by the firedancer client. */
     610             : 
     611           0 :   bank->blockhash_queue.last_hash_index = slot_bank->block_hash_queue.last_hash_index;
     612           0 :   bank->blockhash_queue.last_hash       = fd_spad_alloc( snapshot_ctx->spad, FD_HASH_ALIGN, FD_HASH_FOOTPRINT );
     613           0 :   *bank->blockhash_queue.last_hash      = *slot_bank->block_hash_queue.last_hash;
     614             : 
     615           0 :   bank->blockhash_queue.ages_len = fd_hash_hash_age_pair_t_map_size( slot_bank->block_hash_queue.ages_pool, slot_bank->block_hash_queue.ages_root);
     616           0 :   bank->blockhash_queue.ages     = fd_spad_alloc( snapshot_ctx->spad, FD_HASH_HASH_AGE_PAIR_ALIGN, bank->blockhash_queue.ages_len * sizeof(fd_hash_hash_age_pair_t) );
     617           0 :   bank->blockhash_queue.max_age  = FD_BLOCKHASH_QUEUE_SIZE;
     618             : 
     619           0 :   fd_block_hash_queue_t             * queue               = &slot_bank->block_hash_queue;
     620           0 :   fd_hash_hash_age_pair_t_mapnode_t * nn                  = NULL;
     621           0 :   ulong                               blockhash_queue_idx = 0UL;
     622           0 :   for( fd_hash_hash_age_pair_t_mapnode_t * n = fd_hash_hash_age_pair_t_map_minimum( queue->ages_pool, queue->ages_root ); n; n = nn ) {
     623           0 :     nn = fd_hash_hash_age_pair_t_map_successor( queue->ages_pool, n );
     624           0 :     bank->blockhash_queue.ages[ blockhash_queue_idx++ ] = n->elem;
     625           0 :   }
     626             : 
     627             : 
     628             : 
     629             :   /* Ancestor can be omitted to boot off of for both clients */
     630             : 
     631           0 :   bank->ancestors_len                         = 0UL;
     632           0 :   bank->ancestors                             = NULL;
     633             : 
     634           0 :   bank->hash                                  = slot_bank->banks_hash;
     635           0 :   bank->parent_hash                           = slot_bank->prev_banks_hash;
     636           0 :   bank->parent_slot                           = slot_bank->prev_slot;
     637           0 :   bank->hard_forks                            = slot_bank->hard_forks;
     638           0 :   bank->transaction_count                     = slot_bank->transaction_count;
     639           0 :   bank->signature_count                       = slot_bank->parent_signature_cnt;
     640           0 :   bank->capitalization                        = slot_bank->capitalization;
     641           0 :   bank->tick_height                           = slot_bank->tick_height;
     642           0 :   bank->max_tick_height                       = slot_bank->max_tick_height;
     643             : 
     644             :   /* The hashes_per_tick needs to be copied over from the epoch bank because
     645             :      the pointer could go out of bounds during an epoch boundary. */
     646           0 :   bank->hashes_per_tick                       = fd_spad_alloc( snapshot_ctx->spad, alignof(ulong), sizeof(ulong) );
     647           0 :   *bank->hashes_per_tick                      = epoch_bank->hashes_per_tick;
     648             : 
     649           0 :   bank->ticks_per_slot                        = FD_TICKS_PER_SLOT;
     650           0 :   bank->ns_per_slot                           = epoch_bank->ns_per_slot;
     651           0 :   bank->genesis_creation_time                 = epoch_bank->genesis_creation_time;
     652           0 :   bank->slots_per_year                        = epoch_bank->slots_per_year;
     653             : 
     654             :   /* This value can be set to 0 because the Agave client recomputes this value
     655             :      and the firedancer client doesn't use it. */
     656             : 
     657           0 :   bank->accounts_data_len                     = 0UL;
     658             : 
     659           0 :   bank->slot                                  = snapshot_ctx->slot;
     660           0 :   bank->epoch                                 = fd_slot_to_epoch( &epoch_bank->epoch_schedule, bank->slot, NULL );
     661           0 :   bank->block_height                          = slot_bank->block_height;
     662             : 
     663             :   /* Collector id can be left as null for both clients */
     664             : 
     665           0 :   fd_memset( &bank->collector_id, 0, sizeof(fd_pubkey_t) );
     666             : 
     667           0 :   bank->collector_fees                        = slot_bank->collected_execution_fees + slot_bank->collected_priority_fees;
     668           0 :   bank->fee_calculator.lamports_per_signature = slot_bank->lamports_per_signature;
     669           0 :   bank->fee_rate_governor                     = slot_bank->fee_rate_governor;
     670           0 :   bank->collected_rent                        = slot_bank->collected_rent;
     671             : 
     672           0 :   bank->rent_collector.epoch                  = bank->epoch;
     673           0 :   bank->rent_collector.epoch_schedule         = epoch_bank->rent_epoch_schedule;
     674           0 :   bank->rent_collector.slots_per_year         = epoch_bank->slots_per_year;
     675           0 :   bank->rent_collector.rent                   = epoch_bank->rent;
     676             : 
     677           0 :   bank->epoch_schedule                        = epoch_bank->epoch_schedule;
     678           0 :   bank->inflation                             = epoch_bank->inflation;
     679             : 
     680             :   /* Unused accounts can be left as NULL for both clients. */
     681             : 
     682           0 :   fd_memset( &bank->unused_accounts, 0, sizeof(fd_unused_accounts_t) );
     683             : 
     684             :   /* We need to copy over the stakes for two epochs despite the Agave client
     685             :      providing the stakes for 6 epochs. These stakes need to be copied over
     686             :      because of the fact that the leader schedule computation uses the two
     687             :      previous epoch stakes.
     688             : 
     689             :      TODO: This field has been deprecated by agave and has instead been
     690             :      replaced with the versioned epoch stakes field in the manifest. The
     691             :      firedancer client will populate the deprecated field. */
     692             : 
     693           0 :   fd_epoch_epoch_stakes_pair_t * relevant_epoch_stakes = fd_spad_alloc( snapshot_ctx->spad, FD_EPOCH_EPOCH_STAKES_PAIR_ALIGN, 2UL * sizeof(fd_epoch_epoch_stakes_pair_t) );
     694           0 :   fd_memset( &relevant_epoch_stakes[0], 0UL, sizeof(fd_epoch_epoch_stakes_pair_t) );
     695           0 :   fd_memset( &relevant_epoch_stakes[1], 0UL, sizeof(fd_epoch_epoch_stakes_pair_t) );
     696           0 :   relevant_epoch_stakes[0].key                        = bank->epoch;
     697           0 :   relevant_epoch_stakes[0].value.stakes.vote_accounts = slot_bank->epoch_stakes;
     698           0 :   relevant_epoch_stakes[1].key                        = bank->epoch+1UL;
     699           0 :   relevant_epoch_stakes[1].value.stakes.vote_accounts = epoch_bank->next_epoch_stakes;
     700             : 
     701           0 :   bank->epoch_stakes_len = 2UL;
     702           0 :   bank->epoch_stakes     = relevant_epoch_stakes;
     703           0 :   bank->is_delta         = snapshot_ctx->is_incremental;
     704             : 
     705             :   /* The firedancer runtime currently maintains a version of the stakes which
     706             :      can't be reserialized into a format that is compatible with the Solana
     707             :      snapshot format. Therefore, we must recompute the data structure using
     708             :      the pubkeys from the stakes cache that is currently in the epoch context. */
     709             : 
     710           0 :   fd_snapshot_create_serialiable_stakes( snapshot_ctx, &epoch_bank->stakes, &bank->stakes );
     711             : 
     712           0 : }
     713             : 
     714             : static inline void
     715           0 : fd_snapshot_create_setup_and_validate_ctx( fd_snapshot_ctx_t * snapshot_ctx ) {
     716             : 
     717           0 :   fd_funk_t * funk = snapshot_ctx->funk;
     718             : 
     719             :   /* First the epoch bank. */
     720             : 
     721           0 :   fd_funk_rec_key_t     epoch_id  = fd_runtime_epoch_bank_key();
     722           0 :   fd_funk_rec_query_t   query[1];
     723           0 :   fd_funk_rec_t const * epoch_rec = fd_funk_rec_query_try( funk, NULL, &epoch_id, query );
     724           0 :   if( FD_UNLIKELY( !epoch_rec ) ) {
     725           0 :     FD_LOG_ERR(( "Failed to read epoch bank record: missing record" ));
     726           0 :   }
     727           0 :   void * epoch_val = fd_funk_val( epoch_rec, fd_funk_wksp( funk ) );
     728             : 
     729           0 :   if( FD_UNLIKELY( fd_funk_val_sz( epoch_rec )<sizeof(uint) ) ) {
     730           0 :     FD_LOG_ERR(( "Failed to read epoch bank record: empty record" ));
     731           0 :   }
     732             : 
     733           0 :   uint epoch_magic = *(uint*)epoch_val;
     734           0 :   if( FD_UNLIKELY( epoch_magic!=FD_RUNTIME_ENC_BINCODE ) ) {
     735           0 :     FD_LOG_ERR(( "Epoch bank record has wrong magic" ));
     736           0 :   }
     737             : 
     738           0 :   int err;
     739           0 :   fd_epoch_bank_t * epoch_bank = fd_bincode_decode_spad(
     740           0 :       epoch_bank, snapshot_ctx->spad,
     741           0 :       (uchar *)epoch_val          + sizeof(uint),
     742           0 :       fd_funk_val_sz( epoch_rec ) - sizeof(uint),
     743           0 :       &err );
     744           0 :   if( FD_UNLIKELY( err!=FD_BINCODE_SUCCESS ) ) {
     745           0 :     FD_LOG_ERR(( "Failed to decode epoch bank" ));
     746           0 :   }
     747             : 
     748           0 :   snapshot_ctx->epoch_bank = *epoch_bank;
     749             : 
     750           0 :   FD_TEST( !fd_funk_rec_query_test( query ) );
     751             : 
     752             :   /* Now the slot bank. */
     753             : 
     754           0 :   fd_funk_rec_key_t     slot_id  = fd_runtime_slot_bank_key();
     755           0 :   fd_funk_rec_t const * slot_rec = fd_funk_rec_query_try( funk, NULL, &slot_id, query );
     756           0 :   if( FD_UNLIKELY( !slot_rec ) ) {
     757           0 :     FD_LOG_ERR(( "Failed to read slot bank record: missing record" ));
     758           0 :   }
     759           0 :   void * slot_val = fd_funk_val( slot_rec, fd_funk_wksp( funk ) );
     760             : 
     761           0 :   if( FD_UNLIKELY( fd_funk_val_sz( slot_rec )<sizeof(uint) ) ) {
     762           0 :     FD_LOG_ERR(( "Failed to read slot bank record: empty record" ));
     763           0 :   }
     764             : 
     765           0 :   uint slot_magic = *(uint*)slot_val;
     766           0 :   if( FD_UNLIKELY( slot_magic!=FD_RUNTIME_ENC_BINCODE ) ) {
     767           0 :     FD_LOG_ERR(( "Slot bank record has wrong magic" ));
     768           0 :   }
     769             : 
     770           0 :   fd_slot_bank_t * slot_bank = fd_bincode_decode_spad(
     771           0 :       slot_bank, snapshot_ctx->spad,
     772           0 :       (uchar *)slot_val          + sizeof(uint),
     773           0 :       fd_funk_val_sz( slot_rec ) - sizeof(uint),
     774           0 :       &err );
     775           0 :   if( FD_UNLIKELY( err!=FD_BINCODE_SUCCESS ) ) {
     776           0 :     FD_LOG_ERR(( "Failed to decode slot bank" ));
     777           0 :   }
     778             : 
     779           0 :   snapshot_ctx->slot_bank = *slot_bank;
     780             : 
     781           0 :   FD_TEST( !fd_funk_rec_query_test( query ) );
     782             : 
     783             :   /* Validate that the snapshot context is setup correctly */
     784             : 
     785           0 :   if( FD_UNLIKELY( !snapshot_ctx->out_dir ) ) {
     786           0 :     FD_LOG_ERR(( "Snapshot directory is not set" ));
     787           0 :   }
     788             : 
     789           0 :   if( FD_UNLIKELY( snapshot_ctx->slot>snapshot_ctx->slot_bank.slot ) ) {
     790           0 :     FD_LOG_ERR(( "Snapshot slot=%lu is greater than the current slot=%lu",
     791           0 :                      snapshot_ctx->slot, snapshot_ctx->slot_bank.slot ));
     792           0 :   }
     793             : 
     794             :   /* Truncate the two files used for snapshot creation and seek to its start. */
     795             : 
     796           0 :   long seek = lseek( snapshot_ctx->tmp_fd, 0, SEEK_SET );
     797           0 :   if( FD_UNLIKELY( seek ) ) {
     798           0 :     FD_LOG_ERR(( "Failed to seek to the start of the file" ));
     799           0 :   }
     800             : 
     801           0 :   if( FD_UNLIKELY( ftruncate( snapshot_ctx->tmp_fd, 0UL ) < 0 ) ) {
     802           0 :     FD_LOG_ERR(( "Failed to truncate the temporary file" ));
     803           0 :   }
     804             : 
     805           0 :   seek = lseek( snapshot_ctx->snapshot_fd, 0, SEEK_SET );
     806           0 :   if( FD_UNLIKELY( seek ) ) {
     807           0 :     FD_LOG_ERR(( "Failed to seek to the start of the file" ));
     808           0 :   }
     809             : 
     810           0 :   if( FD_UNLIKELY( ftruncate( snapshot_ctx->snapshot_fd, 0UL ) < 0 ) ) {
     811           0 :     FD_LOG_ERR(( "Failed to truncate the snapshot file" ));
     812           0 :   }
     813             : 
     814           0 : }
     815             : 
     816             : static inline void
     817           0 : fd_snapshot_create_setup_writer( fd_snapshot_ctx_t * snapshot_ctx ) {
     818             : 
     819             :   /* Setup a tar writer. */
     820             : 
     821           0 :   uchar * writer_mem   = fd_spad_alloc( snapshot_ctx->spad, fd_tar_writer_align(), fd_tar_writer_footprint() );
     822           0 :   snapshot_ctx->writer = fd_tar_writer_new( writer_mem, snapshot_ctx->tmp_fd );
     823           0 :   if( FD_UNLIKELY( !snapshot_ctx->writer ) ) {
     824           0 :     FD_LOG_ERR(( "Unable to create a tar writer" ));
     825           0 :   }
     826           0 : }
     827             : 
     828             : static inline void
     829           0 : fd_snapshot_create_write_version( fd_snapshot_ctx_t * snapshot_ctx ) {
     830             : 
     831             :   /* The first file in the tar archive should be the version file.. */
     832             : 
     833           0 :   int err = fd_tar_writer_new_file( snapshot_ctx->writer, FD_SNAPSHOT_VERSION_FILE );
     834           0 :   if( FD_UNLIKELY( err ) ) {
     835           0 :     FD_LOG_ERR(( "Failed to create the version file" ));
     836           0 :   }
     837             : 
     838           0 :   err = fd_tar_writer_write_file_data( snapshot_ctx->writer, FD_SNAPSHOT_VERSION, FD_SNAPSHOT_VERSION_LEN);
     839           0 :   if( FD_UNLIKELY( err ) ) {
     840           0 :     FD_LOG_ERR(( "Failed to create the version file" ));
     841           0 :   }
     842             : 
     843           0 :   err = fd_tar_writer_fini_file( snapshot_ctx->writer );
     844           0 :   if( FD_UNLIKELY( err ) ) {
     845           0 :     FD_LOG_ERR(( "Failed to create the version file" ));
     846           0 :   }
     847             : 
     848           0 : }
     849             : 
     850             : static inline void
     851           0 : fd_snapshot_create_write_status_cache( fd_snapshot_ctx_t * snapshot_ctx ) {
     852             : 
     853             :   /* First convert the existing status cache into a snapshot-friendly format. */
     854             : 
     855           0 :   fd_bank_slot_deltas_t slot_deltas_new = {0};
     856           0 :   int err = fd_txncache_get_entries( snapshot_ctx->status_cache,
     857           0 :                                      &slot_deltas_new,
     858           0 :                                      snapshot_ctx->spad );
     859           0 :   if( FD_UNLIKELY( err ) ) {
     860           0 :     FD_LOG_ERR(( "Failed to get entries from the status cache" ));
     861           0 :   }
     862           0 :   ulong   bank_slot_deltas_sz = fd_bank_slot_deltas_size( &slot_deltas_new );
     863           0 :   uchar * out_status_cache    = fd_spad_alloc( snapshot_ctx->spad,
     864           0 :                                                FD_BANK_SLOT_DELTAS_ALIGN,
     865           0 :                                                bank_slot_deltas_sz );
     866           0 :   fd_bincode_encode_ctx_t encode_status_cache = {
     867           0 :     .data    = out_status_cache,
     868           0 :     .dataend = out_status_cache + bank_slot_deltas_sz,
     869           0 :   };
     870           0 :   if( FD_UNLIKELY( fd_bank_slot_deltas_encode( &slot_deltas_new, &encode_status_cache ) ) ) {
     871           0 :     FD_LOG_ERR(( "Failed to encode the status cache" ));
     872           0 :   }
     873             : 
     874             :   /* Now write out the encoded buffer to the tar archive. */
     875             : 
     876           0 :   err = fd_tar_writer_new_file( snapshot_ctx->writer, FD_SNAPSHOT_STATUS_CACHE_FILE );
     877           0 :   if( FD_UNLIKELY( err ) ) {
     878           0 :     FD_LOG_ERR(( "Failed to create the status cache file" ));
     879           0 :   }
     880           0 :   err = fd_tar_writer_write_file_data( snapshot_ctx->writer, out_status_cache, bank_slot_deltas_sz );
     881           0 :   if( FD_UNLIKELY( err ) ) {
     882           0 :     FD_LOG_ERR(( "Failed to create the status cache file" ));
     883           0 :   }
     884           0 :   err = fd_tar_writer_fini_file( snapshot_ctx->writer );
     885           0 :   if( FD_UNLIKELY( err ) ) {
     886           0 :     FD_LOG_ERR(( "Failed to create the status cache file" ));
     887           0 :   }
     888             : 
     889             :   /* Registers all roots and unconstipates the status cache. */
     890             : 
     891           0 :   fd_txncache_flush_constipated_slots( snapshot_ctx->status_cache );
     892             : 
     893           0 : }
     894             : 
     895             : static inline void
     896             : fd_snapshot_create_write_manifest_and_acc_vecs( fd_snapshot_ctx_t * snapshot_ctx,
     897             :                                                 fd_hash_t *         out_hash,
     898           0 :                                                 ulong *             out_capitalization ) {
     899             : 
     900             : 
     901           0 :   fd_solana_manifest_t manifest = {0};
     902             : 
     903             :   /* Copy in all the fields of the bank. */
     904             : 
     905           0 :   fd_snapshot_create_populate_bank( snapshot_ctx, &manifest.bank );
     906             : 
     907             :   /* Populate the rest of the manifest, except for the append vec index. */
     908             : 
     909           0 :   manifest.lamports_per_signature                = snapshot_ctx->slot_bank.lamports_per_signature;
     910           0 :   manifest.epoch_account_hash                    = &snapshot_ctx->slot_bank.epoch_account_hash;
     911             : 
     912             :   /* FIXME: The versioned epoch stakes needs to be implemented. Right now if
     913             :      we try to create a snapshot on or near an epoch boundary, we will produce
     914             :      an invalid snapshot. */
     915             : 
     916           0 :   manifest.versioned_epoch_stakes_len            = 0UL;
     917           0 :   manifest.versioned_epoch_stakes                = NULL;
     918             : 
     919             :   /* Populate the append vec index and write out the corresponding acc files. */
     920             : 
     921           0 :   ulong incr_capitalization = 0UL;
     922           0 :   fd_snapshot_create_populate_acc_vecs( snapshot_ctx, &manifest, snapshot_ctx->writer, &incr_capitalization );
     923             : 
     924             :   /* Once the append vec index is populated and the hashes are calculated,
     925             :      propogate the hashes to the correct fields. As a note, the last_snap_hash
     926             :      is the full snapshot's account hash. */
     927             : 
     928           0 :   if( snapshot_ctx->is_incremental ) {
     929           0 :     manifest.bank_incremental_snapshot_persistence->full_slot                  = snapshot_ctx->last_snap_slot;
     930           0 :     manifest.bank_incremental_snapshot_persistence->full_hash                  = *snapshot_ctx->last_snap_acc_hash;
     931           0 :     manifest.bank_incremental_snapshot_persistence->full_capitalization        = snapshot_ctx->last_snap_capitalization;
     932           0 :     manifest.bank_incremental_snapshot_persistence->incremental_hash           = snapshot_ctx->acc_hash;
     933           0 :     manifest.bank_incremental_snapshot_persistence->incremental_capitalization = incr_capitalization;
     934           0 :   } else {
     935           0 :     *out_hash           = manifest.accounts_db.bank_hash_info.accounts_hash;
     936           0 :     *out_capitalization = snapshot_ctx->slot_bank.capitalization;
     937           0 :   }
     938             : 
     939             :   /* At this point, all of the account files are written out and the append
     940             :      vec index is populated in the manifest. We have already reserved space
     941             :      in the archive for the manifest. All we need to do now is encode the
     942             :      manifest and write it in. */
     943             : 
     944           0 :   ulong   manifest_sz  = fd_solana_manifest_size( &manifest );
     945           0 :   uchar * out_manifest = fd_spad_alloc( snapshot_ctx->spad, fd_solana_manifest_align(), manifest_sz );
     946             : 
     947           0 :   fd_bincode_encode_ctx_t encode = {
     948           0 :     .data    = out_manifest,
     949           0 :     .dataend = out_manifest + manifest_sz
     950           0 :   };
     951             : 
     952           0 :   int err = fd_solana_manifest_encode( &manifest, &encode );
     953           0 :   if( FD_UNLIKELY( err ) ) {
     954           0 :     FD_LOG_ERR(( "Failed to encode the manifest" ));
     955           0 :   }
     956             : 
     957           0 :   err = fd_tar_writer_fill_space( snapshot_ctx->writer, out_manifest, manifest_sz );
     958           0 :   if( FD_UNLIKELY( err ) ) {
     959           0 :     FD_LOG_ERR(( "Failed to write out the manifest" ));
     960           0 :   }
     961             : 
     962           0 :   void * mem = fd_tar_writer_delete( snapshot_ctx->writer );
     963           0 :   if( FD_UNLIKELY( !mem ) ) {
     964           0 :     FD_LOG_ERR(( "Unable to delete the tar writer" ));
     965           0 :   }
     966             : 
     967           0 : }
     968             : 
     969             : static inline void
     970           0 : fd_snapshot_create_compress( fd_snapshot_ctx_t * snapshot_ctx ) {
     971             : 
     972             :   /* Compress the file using zstd. First open the non-compressed file and
     973             :      create a file for the compressed file. The reason why we can't do this
     974             :      as we stream out the snapshot archive is that we write back into the
     975             :      manifest buffer.
     976             : 
     977             :      TODO: A way to eliminate this and to just stream out
     978             :      1 compressed file would be to totally precompute the index such that
     979             :      we don't have to write back into funk.
     980             : 
     981             :      TODO: Currently, the snapshot service interfaces directly with the zstd
     982             :      library but a generalized cstream defined in fd_zstd should be used
     983             :      instead. */
     984             : 
     985           0 :   ulong in_buf_sz   = ZSTD_CStreamInSize();
     986           0 :   ulong zstd_buf_sz = ZSTD_CStreamOutSize();
     987           0 :   ulong out_buf_sz  = ZSTD_CStreamOutSize();
     988             : 
     989           0 :   char * in_buf   = fd_spad_alloc( snapshot_ctx->spad, FD_ZSTD_CSTREAM_ALIGN, in_buf_sz );
     990           0 :   char * zstd_buf = fd_spad_alloc( snapshot_ctx->spad, FD_ZSTD_CSTREAM_ALIGN, out_buf_sz );
     991           0 :   char * out_buf  = fd_spad_alloc( snapshot_ctx->spad, FD_ZSTD_CSTREAM_ALIGN, out_buf_sz );
     992             : 
     993             :   /* Reopen the tarball and open/overwrite the filename for the compressed,
     994             :      finalized full snapshot. Setup the zstd compression stream. */
     995             : 
     996           0 :   int err = 0;
     997             : 
     998           0 :   ZSTD_CStream * cstream = ZSTD_createCStream();
     999           0 :   if( FD_UNLIKELY( !cstream ) ) {
    1000           0 :     FD_LOG_ERR(( "Failed to create the zstd compression stream" ));
    1001           0 :   }
    1002           0 :   ZSTD_initCStream( cstream, ZSTD_CLEVEL_DEFAULT );
    1003             : 
    1004           0 :   fd_io_buffered_ostream_t ostream[1];
    1005             : 
    1006           0 :   if( FD_UNLIKELY( !fd_io_buffered_ostream_init( ostream, snapshot_ctx->snapshot_fd, out_buf, out_buf_sz ) ) ) {
    1007           0 :     FD_LOG_ERR(( "Failed to initialize the ostream" ));
    1008           0 :   }
    1009             : 
    1010           0 :   long seek = lseek( snapshot_ctx->snapshot_fd, 0, SEEK_SET );
    1011           0 :   if( FD_UNLIKELY( seek!=0L ) ) {
    1012           0 :     FD_LOG_ERR(( "Failed to seek to the start of the file" ));
    1013           0 :   }
    1014             : 
    1015             :   /* At this point, the tar archive and the new zstd file is open. The zstd
    1016             :      streamer is still open. Now, we are ready to read in bytes and stream
    1017             :      compress them. We will keep going until we see an EOF in a tar archive. */
    1018             : 
    1019           0 :   ulong in_sz = in_buf_sz;
    1020             : 
    1021           0 :   ulong off = (ulong)lseek( snapshot_ctx->tmp_fd, 0, SEEK_SET );
    1022           0 :   if( FD_UNLIKELY( off ) ) {
    1023           0 :     FD_LOG_ERR(( "Failed to seek to the beginning of the file" ));
    1024           0 :   }
    1025             : 
    1026           0 :   while( in_sz==in_buf_sz ) {
    1027             : 
    1028             :     /* Read chunks from the file. There isn't really a need to use a streamed
    1029             :        reader here because we will read in the max size buffer for every single
    1030             :        file read except for the very last one.
    1031             : 
    1032             :        in_sz will only not equal in_buf_sz on the last read. */
    1033           0 :     err = fd_io_read( snapshot_ctx->tmp_fd, in_buf, 0UL, in_buf_sz, &in_sz );
    1034           0 :     if( FD_UNLIKELY( err ) ) {
    1035           0 :       FD_LOG_ERR(( "Failed to read in the file" ));
    1036           0 :     }
    1037             : 
    1038             :     /* Compress the in memory buffer and add it to the output stream. */
    1039             : 
    1040           0 :     ZSTD_inBuffer input = { in_buf, in_sz, 0UL };
    1041           0 :     while( input.pos<input.size ) {
    1042           0 :       ZSTD_outBuffer output = { zstd_buf, zstd_buf_sz, 0UL };
    1043           0 :       ulong          ret    = ZSTD_compressStream( cstream, &output, &input );
    1044             : 
    1045           0 :       if( FD_UNLIKELY( ZSTD_isError( ret ) ) ) {
    1046           0 :         FD_LOG_ERR(( "Compression error: %s\n", ZSTD_getErrorName( ret ) ));
    1047           0 :       }
    1048             : 
    1049           0 :       err = fd_io_buffered_ostream_write( ostream, zstd_buf, output.pos );
    1050           0 :       if( FD_UNLIKELY( err ) ) {
    1051           0 :         FD_LOG_ERR(( "Failed to write out the compressed file" ));
    1052           0 :       }
    1053           0 :     }
    1054           0 :   }
    1055             : 
    1056             :   /* Now flush any bytes left in the zstd buffer, cleanup open file
    1057             :      descriptors, and deinit any data structures.  */
    1058             : 
    1059           0 :   ZSTD_outBuffer output    = { zstd_buf, zstd_buf_sz, 0UL };
    1060           0 :   ulong          remaining = ZSTD_endStream(  cstream, &output );
    1061             : 
    1062           0 :   if( FD_UNLIKELY( ZSTD_isError( remaining ) ) ) {
    1063           0 :     FD_LOG_ERR(( "Unable to end the zstd stream" ));
    1064           0 :   }
    1065           0 :   if( output.pos>0UL ) {
    1066           0 :     fd_io_buffered_ostream_write( ostream, zstd_buf, output.pos );
    1067           0 :   }
    1068             : 
    1069           0 :   ZSTD_freeCStream( cstream ); /* Works even if cstream is null */
    1070           0 :   err = fd_io_buffered_ostream_flush( ostream );
    1071           0 :   if( FD_UNLIKELY( err ) ) {
    1072           0 :     FD_LOG_ERR(( "Failed to flush the ostream" ));
    1073           0 :   }
    1074             : 
    1075             :   /* Assuming that there was a successful write, make the compressed
    1076             :      snapshot file readable and servable. */
    1077             : 
    1078           0 :   char tmp_directory_buf_zstd[ FD_SNAPSHOT_DIR_MAX ];
    1079           0 :   err = snprintf( tmp_directory_buf_zstd, FD_SNAPSHOT_DIR_MAX, "%s/%s", snapshot_ctx->out_dir, snapshot_ctx->is_incremental ? FD_SNAPSHOT_TMP_INCR_ARCHIVE_ZSTD : FD_SNAPSHOT_TMP_FULL_ARCHIVE_ZSTD );
    1080           0 :   if( FD_UNLIKELY( err<0 ) ) {
    1081           0 :     FD_LOG_ERR(( "Failed to format directory string" ));
    1082           0 :   }
    1083             : 
    1084           0 :   char directory_buf_zstd[ FD_SNAPSHOT_DIR_MAX ];
    1085           0 :   if( !snapshot_ctx->is_incremental ) {
    1086           0 :     err = snprintf( directory_buf_zstd, FD_SNAPSHOT_DIR_MAX, "%s/snapshot-%lu-%s.tar.zst",
    1087           0 :                     snapshot_ctx->out_dir, snapshot_ctx->slot, FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->snap_hash) );
    1088           0 :   } else {
    1089           0 :     err = snprintf( directory_buf_zstd, FD_SNAPSHOT_DIR_MAX, "%s/incremental-snapshot-%lu-%lu-%s.tar.zst",
    1090           0 :                     snapshot_ctx->out_dir, snapshot_ctx->last_snap_slot, snapshot_ctx->slot, FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->snap_hash) );
    1091           0 :   }
    1092             : 
    1093           0 :   if( FD_UNLIKELY( err<0 ) ) {
    1094           0 :     FD_LOG_ERR(( "Failed to format directory string" ));
    1095           0 :   }
    1096             : 
    1097           0 :   err = rename( tmp_directory_buf_zstd, directory_buf_zstd );
    1098           0 :   if( FD_UNLIKELY( err<0 ) ) {
    1099           0 :     FD_LOG_ERR(( "Failed to rename file from %s to %s (%i-%s)", tmp_directory_buf_zstd, directory_buf_zstd, errno, fd_io_strerror( errno ) ));
    1100           0 :   }
    1101             : 
    1102           0 : }
    1103             : 
    1104             : void
    1105             : fd_snapshot_create_new_snapshot( fd_snapshot_ctx_t * snapshot_ctx,
    1106             :                                  fd_hash_t *         out_hash,
    1107           0 :                                  ulong *             out_capitalization ) {
    1108             : 
    1109           0 :   FD_LOG_NOTICE(( "Starting to produce a snapshot for slot=%lu in directory=%s", snapshot_ctx->slot, snapshot_ctx->out_dir ));
    1110             : 
    1111             :   /* Validate that the snapshot_ctx is setup correctly. */
    1112             : 
    1113           0 :   fd_snapshot_create_setup_and_validate_ctx( snapshot_ctx );
    1114             : 
    1115             :   /* Setup the tar archive writer. */
    1116             : 
    1117           0 :   fd_snapshot_create_setup_writer( snapshot_ctx );
    1118             : 
    1119             :   /* Write out the version file. */
    1120             : 
    1121           0 :   fd_snapshot_create_write_version( snapshot_ctx );
    1122             : 
    1123             :   /* Dump the status cache and append it to the tar archive. */
    1124             : 
    1125           0 :   fd_snapshot_create_write_status_cache( snapshot_ctx );
    1126             : 
    1127             :   /* Populate and write out the manifest and append vecs. */
    1128             : 
    1129           0 :   fd_snapshot_create_write_manifest_and_acc_vecs( snapshot_ctx, out_hash, out_capitalization );
    1130             : 
    1131             :   /* Compress the tar file and write it out to the specified directory. */
    1132             : 
    1133           0 :   fd_snapshot_create_compress( snapshot_ctx );
    1134             : 
    1135           0 :   FD_LOG_NOTICE(( "Finished producing a snapshot" ));
    1136             : 
    1137           0 : }

Generated by: LCOV version 1.14