LCOV - code coverage report
Current view: top level - flamenco/snapshot - fd_snapshot_create.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 668 0.0 %
Date: 2025-03-20 12:08:36 Functions: 0 11 0.0 %

          Line data    Source code
       1             : #include "fd_snapshot_create.h"
       2             : #include "../runtime/sysvar/fd_sysvar_epoch_schedule.h"
       3             : #include "../../ballet/zstd/fd_zstd.h"
       4             : #include "../runtime/fd_hashes.h"
       5             : #include "../runtime/fd_runtime.h"
       6             : 
       7             : #include <errno.h>
       8             : #include <stdio.h>
       9             : #include <stdlib.h>
      10             : #include <sys/stat.h>
      11             : #include <sys/types.h>
      12             : #include <unistd.h>
      13             : #include <zstd.h>
      14             : 
      15             : static uchar             padding[ FD_SNAPSHOT_ACC_ALIGN ] = {0};
      16             : static fd_account_meta_t default_meta = { .magic = FD_ACCOUNT_META_MAGIC };
      17             : 
      18             : static inline fd_account_meta_t *
      19           0 : fd_snapshot_create_get_default_meta( ulong slot ) {
      20           0 :   default_meta.slot = slot;
      21           0 :   return &default_meta;
      22           0 : }
      23             : 
      24             : static inline void
      25             : fd_snapshot_create_populate_acc_vecs( fd_snapshot_ctx_t *    snapshot_ctx,
      26             :                                       fd_solana_manifest_t * manifest,
      27             :                                       fd_tar_writer_t *      writer,
      28           0 :                                       ulong *                out_cap ) {
      29             : 
      30             :   /* The append vecs need to be described in an index in the manifest so a
      31             :      reader knows what account files to look for. These files are technically
      32             :      slot indexed, but the Firedancer implementation of the Solana snapshot
      33             :      produces far fewer indices. These storages are for the accounts
      34             :      that were modified and deleted in the most recent slot because that
      35             :      information is used by the Agave client to calculate and verify the
      36             :      bank hash for the given slot. This is done as an optimization to avoid
      37             :      having to slot index the Firedancer accounts db which would incur a large
      38             :      performance hit.
      39             : 
      40             :      To avoid iterating through the root twice to determine what accounts were
      41             :      touched in the snapshot slot and what accounts were touched in the
      42             :      other slots, we will create an array of pubkey pointers for all accounts
      43             :      that were touched in the snapshot slot. This buffer can be safely sized to
      44             :      the maximum amount of writable accounts that are possible in a non-epoch
      45             :      boundary slot. The rationale for this bound is explained in fd_runtime.h.
      46             :      We will not attempt to create a snapshot on an epoch boundary.
      47             : 
      48             :      TODO: We must add compaction here. */
      49             : 
      50           0 :   fd_pubkey_t * * snapshot_slot_keys    = fd_spad_alloc( snapshot_ctx->spad, alignof(fd_pubkey_t*), sizeof(fd_pubkey_t*) * FD_WRITABLE_ACCS_IN_SLOT );
      51           0 :   ulong           snapshot_slot_key_cnt = 0UL;
      52             : 
      53             :   /* We will dynamically resize the number of incremental keys because the upper
      54             :      bound will be roughly 8 bytes * writable accs in a slot * number of slots
      55             :      since the last full snapshot which can quickly grow to be severalgigabytes
      56             :      or more. In the normal case, this won't require dynamic resizing. */
      57           0 :   #define FD_INCREMENTAL_KEY_INIT_BOUND (100000UL)
      58           0 :   ulong                       incremental_key_bound = FD_INCREMENTAL_KEY_INIT_BOUND;
      59           0 :   ulong                       incremental_key_cnt   = 0UL;
      60           0 :   fd_funk_rec_key_t const * * incremental_keys      = snapshot_ctx->is_incremental ?
      61           0 :                                                       fd_spad_alloc( snapshot_ctx->spad, alignof(fd_funk_rec_key_t*), sizeof(fd_funk_rec_key_t*) * incremental_key_bound ) :
      62           0 :                                                       NULL;
      63             : 
      64           0 :   #undef FD_INCREMENTAL_KEY_INIT_BOUND
      65             : 
      66             :   /* In order to size out the accounts DB index in the manifest, we must
      67             :      iterate through funk and accumulate the size of all of the records
      68             :      from all slots before the snapshot_slot. */
      69             : 
      70           0 :   fd_funk_t * funk           = snapshot_ctx->acc_mgr->funk;
      71           0 :   ulong       prev_sz        = 0UL;
      72           0 :   ulong       tombstones_cnt = 0UL;
      73           0 :   for( fd_funk_rec_t const * rec = fd_funk_txn_first_rec( funk, NULL ); NULL != rec; rec = fd_funk_txn_next_rec( funk, rec ) ) {
      74             : 
      75           0 :     if( !fd_funk_key_is_acc( rec->pair.key ) ) {
      76           0 :       continue;
      77           0 :     }
      78             : 
      79           0 :     tombstones_cnt++;
      80             : 
      81           0 :     int                 is_tombstone = rec->flags & FD_FUNK_REC_FLAG_ERASE;
      82           0 :     uchar const *       raw          = fd_funk_val( rec, fd_funk_wksp( funk ) );
      83           0 :     fd_account_meta_t * metadata     = is_tombstone ? fd_snapshot_create_get_default_meta( fd_funk_rec_get_erase_data( rec ) ) :
      84           0 :                                                       (fd_account_meta_t*)raw;
      85             : 
      86           0 :     if( !metadata ) {
      87           0 :       continue;
      88           0 :     }
      89             : 
      90           0 :     if( metadata->magic!=FD_ACCOUNT_META_MAGIC ) {
      91           0 :       continue;
      92           0 :     }
      93             : 
      94           0 :     if( snapshot_ctx->is_incremental ) {
      95             :       /* We only care about accounts that were modified since the last
      96             :          snapshot slot for incremental snapshots.
      97             : 
      98             :          We also need to keep track of the capitalization for all of the
      99             :          accounts that are in the incremental as this is verified. */
     100           0 :       if( metadata->slot<=snapshot_ctx->last_snap_slot ) {
     101           0 :         continue;
     102           0 :       }
     103           0 :       incremental_keys[ incremental_key_cnt++ ] = rec->pair.key;
     104           0 :       *out_cap += metadata->info.lamports;
     105             : 
     106           0 :       if( FD_UNLIKELY( incremental_key_cnt==incremental_key_bound ) ) {
     107             :         /* Dynamically resize if needed. */
     108           0 :         incremental_key_bound *= 2UL;
     109           0 :         fd_funk_rec_key_t const * * new_incremental_keys = fd_spad_alloc( snapshot_ctx->spad,
     110           0 :                                                                           alignof(fd_funk_rec_key_t*),
     111           0 :                                                                           sizeof(fd_funk_rec_key_t*) * incremental_key_bound );
     112           0 :         fd_memcpy( new_incremental_keys, incremental_keys, sizeof(fd_funk_rec_key_t*) * incremental_key_cnt );
     113           0 :         fd_valloc_free( fd_spad_virtual( snapshot_ctx->spad ), incremental_keys );
     114           0 :         incremental_keys = new_incremental_keys;
     115           0 :       }
     116           0 :     }
     117             : 
     118             :     /* We know that all of the accounts from the snapshot slot can fit into
     119             :        one append vec, so we ignore all accounts from the snapshot slot. */
     120             : 
     121           0 :     if( metadata->slot==snapshot_ctx->slot ) {
     122           0 :       continue;
     123           0 :     }
     124             : 
     125           0 :     prev_sz += metadata->dlen + sizeof(fd_solana_account_hdr_t);
     126             : 
     127           0 :   }
     128             : 
     129             :   /* At this point we have sized out all of the relevant accounts that will
     130             :      be included in the snapshot. Now we must populate each of the append vecs
     131             :      and update the index as we go.
     132             : 
     133             :      When we account for the number of slots we need to consider one append vec
     134             :      for the snapshot slot and try to maximally fill up the others: an append
     135             :      vec has a protocol-defined maximum size in Agave.  */
     136             : 
     137           0 :   ulong num_slots = 1UL + prev_sz / FD_SNAPSHOT_APPEND_VEC_SZ_MAX +
     138           0 :                     (prev_sz % FD_SNAPSHOT_APPEND_VEC_SZ_MAX ? 1UL : 0UL);
     139             : 
     140           0 :   fd_solana_accounts_db_fields_t * accounts_db = &manifest->accounts_db;
     141             : 
     142           0 :   accounts_db->storages_len                    = num_slots;
     143           0 :   accounts_db->storages                        = fd_spad_alloc( snapshot_ctx->spad,
     144           0 :                                                                 FD_SNAPSHOT_SLOT_ACC_VECS_ALIGN,
     145           0 :                                                                 sizeof(fd_snapshot_slot_acc_vecs_t) * accounts_db->storages_len );
     146           0 :   accounts_db->version                        = 1UL;
     147           0 :   accounts_db->slot                           = snapshot_ctx->slot;
     148           0 :   accounts_db->historical_roots_len           = 0UL;
     149           0 :   accounts_db->historical_roots               = NULL;
     150           0 :   accounts_db->historical_roots_with_hash_len = 0UL;
     151           0 :   accounts_db->historical_roots_with_hash     = NULL;
     152             : 
     153           0 :   for( ulong i=0UL; i<num_slots; i++ ) {
     154             :     /* Populate the storages for each slot. As a note, the slot number only
     155             :        matters for the snapshot slot. The other slot numbers don't affect
     156             :        consensus at all. Agave also maintains an invariant that there can
     157             :        only be one account vec per storage. */
     158             : 
     159           0 :     accounts_db->storages[ i ].account_vecs_len          = 1UL;
     160           0 :     accounts_db->storages[ i ].account_vecs              = fd_spad_alloc( snapshot_ctx->spad,
     161           0 :                                                                           FD_SNAPSHOT_ACC_VEC_ALIGN,
     162           0 :                                                                           sizeof(fd_snapshot_acc_vec_t) * accounts_db->storages[ i ].account_vecs_len );
     163           0 :     accounts_db->storages[ i ].account_vecs[ 0 ].file_sz = 0UL;
     164           0 :     accounts_db->storages[ i ].account_vecs[ 0 ].id      = i + 1UL;
     165           0 :     accounts_db->storages[ i ].slot                      = snapshot_ctx->slot - i;
     166           0 :   }
     167             : 
     168             :   /* At this point we have iterated through all of the accounts and created
     169             :      the index. We are now ready to generate a snapshot hash. For both
     170             :      snapshots we need to generate two hashes:
     171             :      1. The accounts hash. This is a simple hash of all of the accounts
     172             :         included in the snapshot.
     173             :      2. The snapshot hash. This is a hash of the accounts hash and the epoch
     174             :         account hash. If the EAH is not included, then the accounts hash ==
     175             :         snapshot hash.
     176             : 
     177             :     There is some nuance as to which hash goes where. For full snapshots,
     178             :     the accounts hash in the bank hash info is the accounts hash. The hash in
     179             :     the filename is the snapshot hash.
     180             : 
     181             :     For incremental snapshots, the account hash in the bank hash info field is
     182             :     left zeroed out. The full snapshot's hash is in the incremental persistence
     183             :     field. The incremental snapshot's accounts hash is included in the
     184             :     incremental persistence field. The hash in the filename is the snapshot
     185             :     hash. */
     186             : 
     187           0 :   int err;
     188           0 :   if( !snapshot_ctx->is_incremental ) {
     189             : 
     190           0 :     err = fd_snapshot_service_hash( &snapshot_ctx->acc_hash,
     191           0 :                                     &snapshot_ctx->snap_hash,
     192           0 :                                     &snapshot_ctx->slot_bank,
     193           0 :                                     &snapshot_ctx->epoch_bank,
     194           0 :                                     snapshot_ctx->acc_mgr->funk,
     195           0 :                                     snapshot_ctx->tpool,
     196           0 :                                     snapshot_ctx->spad,
     197           0 :                                     snapshot_ctx->features );
     198           0 :     accounts_db->bank_hash_info.accounts_hash = snapshot_ctx->acc_hash;
     199           0 :   } else {
     200           0 :     err = fd_snapshot_service_inc_hash( &snapshot_ctx->acc_hash,
     201           0 :                                         &snapshot_ctx->snap_hash,
     202           0 :                                         &snapshot_ctx->slot_bank,
     203           0 :                                         &snapshot_ctx->epoch_bank,
     204           0 :                                         snapshot_ctx->acc_mgr->funk,
     205           0 :                                         incremental_keys,
     206           0 :                                         incremental_key_cnt,
     207           0 :                                         snapshot_ctx->spad,
     208           0 :                                         snapshot_ctx->features );
     209           0 :     fd_valloc_free( fd_spad_virtual( snapshot_ctx->spad ), incremental_keys );
     210             : 
     211           0 :     fd_memset( &accounts_db->bank_hash_info.accounts_hash, 0, sizeof(fd_hash_t) );
     212           0 :   }
     213             : 
     214           0 :   FD_LOG_NOTICE(( "Hashes calculated acc_hash=%s snapshot_hash=%s",
     215           0 :                   FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->acc_hash),
     216           0 :                   FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->snap_hash) ));
     217             : 
     218           0 :   if( FD_UNLIKELY( err ) ) {
     219           0 :     FD_LOG_ERR(( "Unable to calculate snapshot hash" ));
     220           0 :   }
     221             : 
     222           0 :   fd_memset( &accounts_db->bank_hash_info.stats, 0, sizeof(fd_bank_hash_stats_t) );
     223             : 
     224             :   /* Now, we have calculated the relevant hashes for the accounts.
     225             :      Because the files are serially written out for tar and we need to prepend
     226             :      the manifest, we must reserve space in the archive for the solana manifest. */
     227             : 
     228           0 :   if( snapshot_ctx->is_incremental ) {
     229           0 :     manifest->bank_incremental_snapshot_persistence = fd_spad_alloc( snapshot_ctx->spad,
     230           0 :                                                                      FD_BANK_INCREMENTAL_SNAPSHOT_PERSISTENCE_ALIGN,
     231           0 :                                                                      sizeof(fd_bank_incremental_snapshot_persistence_t) );
     232           0 :   }
     233             : 
     234           0 :   ulong manifest_sz = fd_solana_manifest_size( manifest );
     235             : 
     236           0 :   char buffer[ FD_SNAPSHOT_DIR_MAX ];
     237           0 :   err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "snapshots/%lu/%lu", snapshot_ctx->slot, snapshot_ctx->slot );
     238           0 :   if( FD_UNLIKELY( err<0 ) ) {
     239           0 :     FD_LOG_ERR(( "Unable to format manifest name string" ));
     240           0 :   }
     241             : 
     242           0 :   err = fd_tar_writer_new_file( writer, buffer );
     243           0 :   if( FD_UNLIKELY( err ) ) {
     244           0 :     FD_LOG_ERR(( "Unable to create snapshot manifest file" ));
     245           0 :   }
     246             : 
     247             :   /* TODO: We want to eliminate having to write back into the tar file. This
     248             :      will enable the snapshot service to only use one file per snapshot.
     249             :      In order to do this, we must precompute the index in the manifest
     250             :      completely. This will allow us to stream out a compressed snapshot. */
     251             : 
     252           0 :   err = fd_tar_writer_make_space( writer, manifest_sz );
     253           0 :   if( FD_UNLIKELY( err ) ) {
     254           0 :     FD_LOG_ERR(( "Unable to make space for snapshot manifest file" ));
     255           0 :   }
     256             : 
     257           0 :   err = fd_tar_writer_fini_file( writer );
     258           0 :   if( FD_UNLIKELY( err ) ) {
     259           0 :     FD_LOG_ERR(( "Unable to finalize snapshot manifest file" ));
     260           0 :   }
     261             : 
     262             :   /* We have made space for the manifest and are ready to append the append
     263             :      vec files directly into the tar archive. We will iterate through all of
     264             :      the records in the funk root and create/populate an append vec for
     265             :      previous slots. Just record the pubkeys for the latest slot to populate
     266             :      the append vec after. If the append vec is full, write into the next one. */
     267             : 
     268           0 :   ulong curr_slot = 1UL;
     269           0 :   fd_snapshot_acc_vec_t * prev_accs = &accounts_db->storages[ curr_slot ].account_vecs[ 0UL ];
     270             : 
     271           0 :   err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "accounts/%lu.%lu", snapshot_ctx->slot - curr_slot, prev_accs->id );
     272           0 :   if( FD_UNLIKELY( err<0 ) ) {
     273           0 :     FD_LOG_ERR(( "Unable to format previous accounts name string" ));
     274           0 :   }
     275             : 
     276           0 :   err = fd_tar_writer_new_file( writer, buffer );
     277           0 :   if( FD_UNLIKELY( err ) ) {
     278           0 :     FD_LOG_ERR(( "Unable to create previous accounts file" ));
     279           0 :   }
     280             : 
     281           0 :   fd_funk_rec_t * * tombstones = snapshot_ctx->is_incremental ? NULL :
     282           0 :                                  fd_spad_alloc( snapshot_ctx->spad, alignof(fd_funk_rec_t*), sizeof(fd_funk_rec_t*) * tombstones_cnt );
     283           0 :   tombstones_cnt = 0UL;
     284             : 
     285           0 :   for( fd_funk_rec_t const * rec = fd_funk_txn_first_rec( funk, NULL ); NULL != rec; rec = fd_funk_txn_next_rec( funk, rec ) ) {
     286             : 
     287             :     /* Get the account data. */
     288             : 
     289           0 :     if( !fd_funk_key_is_acc( rec->pair.key ) ) {
     290           0 :       continue;
     291           0 :     }
     292             : 
     293           0 :     fd_pubkey_t const * pubkey       = fd_type_pun_const( rec->pair.key[0].uc );
     294           0 :     int                 is_tombstone = rec->flags & FD_FUNK_REC_FLAG_ERASE;
     295           0 :     uchar const *       raw          = fd_funk_val( rec, fd_funk_wksp( funk ) );
     296           0 :     fd_account_meta_t * metadata     = is_tombstone ? fd_snapshot_create_get_default_meta( fd_funk_rec_get_erase_data( rec ) ) :
     297           0 :                                                       (fd_account_meta_t*)raw;
     298             : 
     299           0 :     if( !snapshot_ctx->is_incremental && is_tombstone ) {
     300             :       /* If we are in a full snapshot, we need to gather all of the accounts
     301             :          that we plan on deleting. */
     302           0 :       tombstones[ tombstones_cnt++ ] = (fd_funk_rec_t*)rec;
     303           0 :     }
     304             : 
     305           0 :     if( !metadata ) {
     306           0 :       continue;
     307           0 :     }
     308             : 
     309           0 :     if( metadata->magic!=FD_ACCOUNT_META_MAGIC ) {
     310           0 :       continue;
     311           0 :     }
     312             : 
     313             :     /* Don't iterate through accounts that were touched before the last full
     314             :        snapshot. */
     315           0 :     if( snapshot_ctx->is_incremental && metadata->slot<=snapshot_ctx->last_snap_slot ) {
     316           0 :       continue;
     317           0 :     }
     318             : 
     319           0 :     uchar const * acc_data = raw + metadata->hlen;
     320             : 
     321             :     /* All accounts that were touched in the snapshot slot should be in
     322             :        a different append vec so that Agave can calculate the snapshot slot's
     323             :        bank hash. We don't want to include them in an arbitrary append vec. */
     324             : 
     325           0 :     if( metadata->slot==snapshot_ctx->slot ) {
     326           0 :       snapshot_slot_keys[ snapshot_slot_key_cnt++ ] = (fd_pubkey_t*)pubkey;
     327           0 :       continue;
     328           0 :     }
     329             : 
     330             :     /* We don't want to iterate over tombstones if the snapshot is not
     331             :        incremental */
     332           0 :     if( !snapshot_ctx->is_incremental && is_tombstone ) {
     333           0 :       continue;
     334           0 :     }
     335             : 
     336           0 :     ulong new_sz = prev_accs->file_sz + sizeof(fd_solana_account_hdr_t) + fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN );
     337             : 
     338           0 :     if( new_sz>FD_SNAPSHOT_APPEND_VEC_SZ_MAX ) {
     339             : 
     340             :       /* When the current append vec is full, finish writing it, start writing
     341             :          into the next append vec. */
     342             : 
     343           0 :       err = fd_tar_writer_fini_file( writer );
     344           0 :       if( FD_UNLIKELY( err ) ) {
     345           0 :         FD_LOG_ERR(( "Unable to finalize previous accounts file" ));
     346           0 :       }
     347             : 
     348           0 :       prev_accs = &accounts_db->storages[ ++curr_slot ].account_vecs[ 0UL ];
     349             : 
     350           0 :       err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "accounts/%lu.%lu", snapshot_ctx->slot - curr_slot, prev_accs->id );
     351           0 :       if( FD_UNLIKELY( err<0 ) ) {
     352           0 :         FD_LOG_ERR(( "Unable to format previous accounts name string" ));
     353           0 :       }
     354             : 
     355           0 :       err = fd_tar_writer_new_file( writer, buffer );
     356           0 :       if( FD_UNLIKELY( err ) ) {
     357           0 :         FD_LOG_ERR(( "Unable to create previous accounts file" ));
     358           0 :       }
     359           0 :     }
     360             : 
     361           0 :     prev_accs->file_sz += sizeof(fd_solana_account_hdr_t) + fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN );
     362             : 
     363             : 
     364             :     /* Write out the header. */
     365             : 
     366           0 :     fd_solana_account_hdr_t header = {0};
     367             :     /* Stored meta */
     368           0 :     header.meta.write_version_obsolete = 0UL;
     369           0 :     header.meta.data_len               = metadata->dlen;
     370           0 :     fd_memcpy( header.meta.pubkey, pubkey, sizeof(fd_pubkey_t) );
     371             :     /* Account Meta */
     372           0 :     header.info.lamports               = metadata->info.lamports;
     373           0 :     header.info.rent_epoch             = header.info.lamports ? metadata->info.rent_epoch : 0UL;
     374           0 :     fd_memcpy( header.info.owner, metadata->info.owner, sizeof(fd_pubkey_t) );
     375           0 :     header.info.executable             = metadata->info.executable;
     376             :     /* Hash */
     377           0 :     fd_memcpy( &header.hash, metadata->hash, sizeof(fd_hash_t) );
     378             : 
     379           0 :     err = fd_tar_writer_write_file_data( writer, &header, sizeof(fd_solana_account_hdr_t) );
     380           0 :     if( FD_UNLIKELY( err ) ) {
     381           0 :       FD_LOG_ERR(( "Unable to stream out account header to tar archive" ));
     382           0 :     }
     383             : 
     384             :     /* Write out the file data. */
     385             : 
     386           0 :     err = fd_tar_writer_write_file_data( writer, acc_data, metadata->dlen );
     387           0 :     if( FD_UNLIKELY( err ) ) {
     388           0 :       FD_LOG_ERR(( "Unable to stream out account data to tar archive" ));
     389           0 :     }
     390             : 
     391           0 :     ulong align_sz = fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN ) - metadata->dlen;
     392           0 :     err = fd_tar_writer_write_file_data( writer, padding, align_sz );
     393           0 :     if( FD_UNLIKELY( err ) ) {
     394           0 :       FD_LOG_ERR( ("Unable to stream out account padding to tar archive" ));
     395           0 :     }
     396           0 :   }
     397             : 
     398           0 :   err = fd_tar_writer_fini_file( writer );
     399           0 :   if( FD_UNLIKELY( err ) ) {
     400           0 :     FD_LOG_ERR(( "Unable to finalize previous accounts file" ));
     401           0 :   }
     402             : 
     403             :   /* Now write out the append vec for the snapshot slot. Again, this is needed
     404             :      because the snapshot slot's accounts must be in their append vec in order
     405             :      to verify the bank hash for the snapshot slot in the Agave client. */
     406             : 
     407           0 :   fd_snapshot_acc_vec_t * curr_accs = &accounts_db->storages[ 0UL ].account_vecs[ 0UL ];
     408           0 :   err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "accounts/%lu.%lu", snapshot_ctx->slot, curr_accs->id );
     409           0 :   if( FD_UNLIKELY( err<0 ) ) {
     410           0 :     FD_LOG_ERR(( "Unable to format current accounts name string" ));
     411           0 :   }
     412             : 
     413           0 :   err = fd_tar_writer_new_file( writer, buffer );
     414           0 :   if( FD_UNLIKELY( err ) ) {
     415           0 :     FD_LOG_ERR(( "Unable to create current accounts file" ));
     416           0 :   }
     417             : 
     418           0 :   for( ulong i=0UL; i<snapshot_slot_key_cnt; i++ ) {
     419             : 
     420           0 :     fd_pubkey_t const * pubkey = snapshot_slot_keys[i];
     421           0 :     fd_funk_rec_key_t key = fd_acc_funk_key( pubkey );
     422             : 
     423           0 :     fd_funk_rec_t const * rec = fd_funk_rec_query( funk, NULL, &key );
     424           0 :     if( FD_UNLIKELY( !rec ) ) {
     425           0 :       FD_LOG_ERR(( "Previously found record can no longer be found" ));
     426           0 :     }
     427             : 
     428           0 :     int                 is_tombstone = rec->flags & FD_FUNK_REC_FLAG_ERASE;
     429           0 :     uchar       const * raw          = fd_funk_val( rec, fd_funk_wksp( funk ) );
     430           0 :     fd_account_meta_t * metadata     = is_tombstone ? fd_snapshot_create_get_default_meta( fd_funk_rec_get_erase_data( rec ) ) :
     431           0 :                                                       (fd_account_meta_t*)raw;
     432             : 
     433           0 :     if( FD_UNLIKELY( !metadata ) ) {
     434           0 :       FD_LOG_ERR(( "Record should have non-NULL metadata" ));
     435           0 :     }
     436             : 
     437           0 :     if( FD_UNLIKELY( metadata->magic!=FD_ACCOUNT_META_MAGIC ) ) {
     438           0 :       FD_LOG_ERR(( "Record should have valid magic" ));
     439           0 :     }
     440             : 
     441           0 :     uchar const * acc_data = raw + metadata->hlen;
     442             : 
     443           0 :     curr_accs->file_sz += sizeof(fd_solana_account_hdr_t) + fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN );
     444             : 
     445             :     /* Write out the header. */
     446           0 :     fd_solana_account_hdr_t header = {0};
     447             :     /* Stored meta */
     448           0 :     header.meta.write_version_obsolete = 0UL;
     449           0 :     header.meta.data_len               = metadata->dlen;
     450           0 :     fd_memcpy( header.meta.pubkey, pubkey, sizeof(fd_pubkey_t) );
     451             :     /* Account Meta */
     452           0 :     header.info.lamports               = metadata->info.lamports;
     453           0 :     header.info.rent_epoch             = header.info.lamports ? metadata->info.rent_epoch : 0UL;
     454           0 :     fd_memcpy( header.info.owner, metadata->info.owner, sizeof(fd_pubkey_t) );
     455           0 :     header.info.executable             = metadata->info.executable;
     456             :     /* Hash */
     457           0 :     fd_memcpy( &header.hash, metadata->hash, sizeof(fd_hash_t) );
     458             : 
     459             : 
     460           0 :     err = fd_tar_writer_write_file_data( writer, &header, sizeof(fd_solana_account_hdr_t) );
     461           0 :     if( FD_UNLIKELY( err ) ) {
     462           0 :       FD_LOG_ERR(( "Unable to stream out account header to tar archive" ));
     463           0 :     }
     464           0 :     err = fd_tar_writer_write_file_data( writer, acc_data, metadata->dlen );
     465           0 :     if( FD_UNLIKELY( err ) ) {
     466           0 :       FD_LOG_ERR(( "Unable to stream out account data to tar archive" ));
     467           0 :     }
     468           0 :     ulong align_sz = fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN ) - metadata->dlen;
     469           0 :     err = fd_tar_writer_write_file_data( writer, padding, align_sz );
     470           0 :     if( FD_UNLIKELY( err ) ) {
     471           0 :       FD_LOG_ERR(( "Unable to stream out account padding to tar archive" ));
     472           0 :     }
     473           0 :   }
     474             : 
     475           0 :   err = fd_tar_writer_fini_file( writer );
     476           0 :   if( FD_UNLIKELY( err ) ) {
     477           0 :     FD_LOG_ERR(( "Unable to finish writing out file" ));
     478           0 :   }
     479             : 
     480             :   /* TODO: At this point we must implement compaction to the snapshot service.
     481             :      Without this, we are actually not cleaning up any tombstones from funk. */
     482             : 
     483           0 :   if( snapshot_ctx->is_incremental ) {
     484           0 :     fd_funk_start_write( funk );
     485           0 :     err = fd_funk_rec_forget( funk, tombstones, tombstones_cnt );
     486           0 :     if( FD_UNLIKELY( err!=FD_FUNK_SUCCESS ) ) {
     487           0 :       FD_LOG_ERR(( "Unable to forget tombstones" ));
     488           0 :     }
     489           0 :     FD_LOG_NOTICE(( "Compacted %lu tombstone records", tombstones_cnt ));
     490           0 :     fd_funk_end_write( funk );
     491           0 :   }
     492             : 
     493           0 :   fd_valloc_free( fd_spad_virtual( snapshot_ctx->spad ), snapshot_slot_keys );
     494           0 :   fd_valloc_free( fd_spad_virtual( snapshot_ctx->spad ), tombstones );
     495             : 
     496           0 : }
     497             : 
     498             : static void
     499             : fd_snapshot_create_serialiable_stakes( fd_snapshot_ctx_t * snapshot_ctx,
     500             :                                        fd_stakes_t *       old_stakes,
     501           0 :                                        fd_stakes_t *       new_stakes ) {
     502             : 
     503             :   /* The deserialized stakes cache that is used by the runtime can't be
     504             :      reserialized into the format that Agave uses. For every vote account
     505             :      in the stakes struct, the Firedancer client holds a decoded copy of the
     506             :      vote state. However, this vote state can't be reserialized back into the
     507             :      full vote account data.
     508             : 
     509             :      This poses a problem in the Agave client client because upon boot, Agave
     510             :      verifies that for all of the vote accounts in the stakes struct, the data
     511             :      in the cache is the same as the data in the accounts db.
     512             : 
     513             :      The other problem is that the Firedancer stakes cache does not evict old
     514             :      entries and doesn't update delegations within the cache. The cache will
     515             :      just insert new pubkeys as stake accounts are created/delegated to. To
     516             :      make the cache conformant for the snapshot, old accounts should be removed
     517             :      from the snapshot and all of the delegations should be updated. */
     518             : 
     519             :   /* First populate the vote accounts using the vote accounts/stakes cache.
     520             :      We can populate over all of the fields except we can't reserialize the
     521             :      vote account data. Instead we will copy over the raw contents of all of
     522             :      the vote accounts. */
     523             : 
     524           0 :   ulong   vote_accounts_len                    = fd_vote_accounts_pair_t_map_size( old_stakes->vote_accounts.vote_accounts_pool, old_stakes->vote_accounts.vote_accounts_root );
     525           0 :   uchar * pool_mem                             = fd_spad_alloc( snapshot_ctx->spad, fd_vote_accounts_pair_t_map_align(), fd_vote_accounts_pair_t_map_footprint( vote_accounts_len ) );
     526           0 :   new_stakes->vote_accounts.vote_accounts_pool = fd_vote_accounts_pair_t_map_join( fd_vote_accounts_pair_t_map_new( pool_mem, vote_accounts_len ) );
     527           0 :   new_stakes->vote_accounts.vote_accounts_root = NULL;
     528             : 
     529           0 :   for( fd_vote_accounts_pair_t_mapnode_t * n = fd_vote_accounts_pair_t_map_minimum(
     530           0 :        old_stakes->vote_accounts.vote_accounts_pool,
     531           0 :        old_stakes->vote_accounts.vote_accounts_root );
     532           0 :        n;
     533           0 :        n = fd_vote_accounts_pair_t_map_successor( old_stakes->vote_accounts.vote_accounts_pool, n ) ) {
     534             : 
     535           0 :     fd_vote_accounts_pair_t_mapnode_t * new_node = fd_vote_accounts_pair_t_map_acquire( new_stakes->vote_accounts.vote_accounts_pool );
     536           0 :     new_node->elem.key   = n->elem.key;
     537           0 :     new_node->elem.stake = n->elem.stake;
     538             :     /* Now to populate the value, lookup the account using the acc mgr */
     539           0 :     FD_TXN_ACCOUNT_DECL( vote_acc );
     540           0 :     int err = fd_acc_mgr_view( snapshot_ctx->acc_mgr, NULL, &n->elem.key, vote_acc );
     541           0 :     if( FD_UNLIKELY( err ) ) {
     542           0 :       FD_LOG_ERR(( "Failed to view vote account from stakes cache %s", FD_BASE58_ENC_32_ALLOCA(&n->elem.key) ));
     543           0 :     }
     544             : 
     545           0 :     new_node->elem.value.lamports   = vote_acc->const_meta->info.lamports;
     546           0 :     new_node->elem.value.data_len   = vote_acc->const_meta->dlen;
     547           0 :     new_node->elem.value.data       = fd_spad_alloc( snapshot_ctx->spad, 8UL, vote_acc->const_meta->dlen );
     548           0 :     fd_memcpy( new_node->elem.value.data, vote_acc->const_data, vote_acc->const_meta->dlen );
     549           0 :     fd_memcpy( &new_node->elem.value.owner, &vote_acc->const_meta->info.owner, sizeof(fd_pubkey_t) );
     550           0 :     new_node->elem.value.executable = vote_acc->const_meta->info.executable;
     551           0 :     new_node->elem.value.rent_epoch = vote_acc->const_meta->info.rent_epoch;
     552           0 :     fd_vote_accounts_pair_t_map_insert( new_stakes->vote_accounts.vote_accounts_pool, &new_stakes->vote_accounts.vote_accounts_root, new_node );
     553             : 
     554           0 :   }
     555             : 
     556             :   /* Stale stake delegations should also be removed or updated in the cache.
     557             :      TODO: This will likely be changed in the near future as the stake
     558             :      program is migrated to a bpf program. It will likely be replaced by an
     559             :      index of stake/vote accounts. */
     560             : 
     561           0 :   FD_TXN_ACCOUNT_DECL( stake_acc );
     562           0 :   fd_delegation_pair_t_mapnode_t *      nn = NULL;
     563           0 :   for( fd_delegation_pair_t_mapnode_t * n  = fd_delegation_pair_t_map_minimum(
     564           0 :       old_stakes->stake_delegations_pool, old_stakes->stake_delegations_root ); n; n=nn ) {
     565             : 
     566           0 :     nn = fd_delegation_pair_t_map_successor( old_stakes->stake_delegations_pool, n );
     567             : 
     568           0 :     int err = fd_acc_mgr_view( snapshot_ctx->acc_mgr, NULL, &n->elem.account, stake_acc );
     569           0 :     if( FD_UNLIKELY( err ) ) {
     570             :       /* If the stake account doesn't exist, the cache is stale and the entry
     571             :          just needs to be evicted. */
     572           0 :       fd_delegation_pair_t_map_remove( old_stakes->stake_delegations_pool, &old_stakes->stake_delegations_root, n );
     573           0 :       fd_delegation_pair_t_map_release( old_stakes->stake_delegations_pool, n );
     574           0 :     } else {
     575             :       /* Otherwise, just update the delegation in case it is stale. */
     576           0 :       fd_bincode_decode_ctx_t ctx = {
     577           0 :         .data    = stake_acc->const_data,
     578           0 :         .dataend = stake_acc->const_data + stake_acc->const_meta->dlen,
     579           0 :       };
     580             : 
     581           0 :       ulong total_sz = 0UL;
     582           0 :       err = fd_stake_state_v2_decode_footprint( &ctx, &total_sz );
     583           0 :       if( FD_UNLIKELY( err ) ) {
     584           0 :         FD_LOG_ERR(( "Failed to decode stake state footprint" ));
     585           0 :       }
     586             : 
     587           0 :       uchar * mem = fd_spad_alloc( snapshot_ctx->spad, FD_STAKE_STATE_V2_ALIGN, total_sz );
     588           0 :       if( FD_UNLIKELY( !mem ) ) {
     589           0 :         FD_LOG_ERR(( "Failed to allocate memory for stake state" ));
     590           0 :       }
     591             : 
     592           0 :       fd_stake_state_v2_t * stake_state = fd_stake_state_v2_decode( mem, &ctx );
     593             : 
     594           0 :       n->elem.delegation = stake_state->inner.stake.stake.delegation;
     595           0 :     }
     596           0 :   }
     597             : 
     598             :   /* Copy over the rest of the fields as they are the same. */
     599             : 
     600           0 :   new_stakes->stake_delegations_pool = old_stakes->stake_delegations_pool;
     601           0 :   new_stakes->stake_delegations_root = old_stakes->stake_delegations_root;
     602           0 :   new_stakes->unused                 = old_stakes->unused;
     603           0 :   new_stakes->epoch                  = old_stakes->epoch;
     604           0 :   new_stakes->stake_history          = old_stakes->stake_history;
     605             : 
     606           0 : }
     607             : 
     608             : static inline void
     609             : fd_snapshot_create_populate_bank( fd_snapshot_ctx_t *   snapshot_ctx,
     610           0 :                                   fd_versioned_bank_t * bank ) {
     611             : 
     612           0 :   fd_slot_bank_t  * slot_bank  = &snapshot_ctx->slot_bank;
     613           0 :   fd_epoch_bank_t * epoch_bank = &snapshot_ctx->epoch_bank;
     614             : 
     615             :   /* The blockhash queue has to be copied over along with all of its entries.
     616             :      As a note, the size is 300 but in fact is of size 301 due to a knwon bug
     617             :      in the agave client that is emulated by the firedancer client. */
     618             : 
     619           0 :   bank->blockhash_queue.last_hash_index = slot_bank->block_hash_queue.last_hash_index;
     620           0 :   bank->blockhash_queue.last_hash       = fd_spad_alloc( snapshot_ctx->spad, FD_HASH_ALIGN, FD_HASH_FOOTPRINT );
     621           0 :   fd_memcpy( bank->blockhash_queue.last_hash, slot_bank->block_hash_queue.last_hash, sizeof(fd_hash_t) );
     622             : 
     623           0 :   bank->blockhash_queue.ages_len = fd_hash_hash_age_pair_t_map_size( slot_bank->block_hash_queue.ages_pool, slot_bank->block_hash_queue.ages_root);
     624           0 :   bank->blockhash_queue.ages     = fd_spad_alloc( snapshot_ctx->spad, FD_HASH_HASH_AGE_PAIR_ALIGN, bank->blockhash_queue.ages_len * sizeof(fd_hash_hash_age_pair_t) );
     625           0 :   bank->blockhash_queue.max_age  = FD_BLOCKHASH_QUEUE_SIZE;
     626             : 
     627           0 :   fd_block_hash_queue_t             * queue               = &slot_bank->block_hash_queue;
     628           0 :   fd_hash_hash_age_pair_t_mapnode_t * nn                  = NULL;
     629           0 :   ulong                               blockhash_queue_idx = 0UL;
     630           0 :   for( fd_hash_hash_age_pair_t_mapnode_t * n = fd_hash_hash_age_pair_t_map_minimum( queue->ages_pool, queue->ages_root ); n; n = nn ) {
     631           0 :     nn = fd_hash_hash_age_pair_t_map_successor( queue->ages_pool, n );
     632           0 :     fd_memcpy( &bank->blockhash_queue.ages[ blockhash_queue_idx++ ], &n->elem, sizeof(fd_hash_hash_age_pair_t) );
     633           0 :   }
     634             : 
     635             : 
     636             : 
     637             :   /* Ancestor can be omitted to boot off of for both clients */
     638             : 
     639           0 :   bank->ancestors_len                         = 0UL;
     640           0 :   bank->ancestors                             = NULL;
     641             : 
     642           0 :   bank->hash                                  = slot_bank->banks_hash;
     643           0 :   bank->parent_hash                           = slot_bank->prev_banks_hash;
     644           0 :   bank->parent_slot                           = slot_bank->prev_slot;
     645           0 :   bank->hard_forks                            = slot_bank->hard_forks;
     646           0 :   bank->transaction_count                     = slot_bank->transaction_count;
     647           0 :   bank->signature_count                       = slot_bank->parent_signature_cnt;
     648           0 :   bank->capitalization                        = slot_bank->capitalization;
     649           0 :   bank->tick_height                           = slot_bank->tick_height;
     650           0 :   bank->max_tick_height                       = slot_bank->max_tick_height;
     651             : 
     652             :   /* The hashes_per_tick needs to be copied over from the epoch bank because
     653             :      the pointer could go out of bounds during an epoch boundary. */
     654           0 :   bank->hashes_per_tick                       = fd_spad_alloc( snapshot_ctx->spad, alignof(ulong), sizeof(ulong) );
     655           0 :   fd_memcpy( bank->hashes_per_tick, &epoch_bank->hashes_per_tick, sizeof(ulong) );
     656             : 
     657           0 :   bank->ticks_per_slot                        = FD_TICKS_PER_SLOT;
     658           0 :   bank->ns_per_slot                           = epoch_bank->ns_per_slot;
     659           0 :   bank->genesis_creation_time                 = epoch_bank->genesis_creation_time;
     660           0 :   bank->slots_per_year                        = epoch_bank->slots_per_year;
     661             : 
     662             :   /* This value can be set to 0 because the Agave client recomputes this value
     663             :      and the firedancer client doesn't use it. */
     664             : 
     665           0 :   bank->accounts_data_len                     = 0UL;
     666             : 
     667           0 :   bank->slot                                  = snapshot_ctx->slot;
     668           0 :   bank->epoch                                 = fd_slot_to_epoch( &epoch_bank->epoch_schedule, bank->slot, NULL );
     669           0 :   bank->block_height                          = slot_bank->block_height;
     670             : 
     671             :   /* Collector id can be left as null for both clients */
     672             : 
     673           0 :   fd_memset( &bank->collector_id, 0, sizeof(fd_pubkey_t) );
     674             : 
     675           0 :   bank->collector_fees                        = slot_bank->collected_execution_fees + slot_bank->collected_priority_fees;
     676           0 :   bank->fee_calculator.lamports_per_signature = slot_bank->lamports_per_signature;
     677           0 :   bank->fee_rate_governor                     = slot_bank->fee_rate_governor;
     678           0 :   bank->collected_rent                        = slot_bank->collected_rent;
     679             : 
     680           0 :   bank->rent_collector.epoch                  = bank->epoch;
     681           0 :   bank->rent_collector.epoch_schedule         = epoch_bank->rent_epoch_schedule;
     682           0 :   bank->rent_collector.slots_per_year         = epoch_bank->slots_per_year;
     683           0 :   bank->rent_collector.rent                   = epoch_bank->rent;
     684             : 
     685           0 :   bank->epoch_schedule                        = epoch_bank->epoch_schedule;
     686           0 :   bank->inflation                             = epoch_bank->inflation;
     687             : 
     688             :   /* Unused accounts can be left as NULL for both clients. */
     689             : 
     690           0 :   fd_memset( &bank->unused_accounts, 0, sizeof(fd_unused_accounts_t) );
     691             : 
     692             :   /* We need to copy over the stakes for two epochs despite the Agave client
     693             :      providing the stakes for 6 epochs. These stakes need to be copied over
     694             :      because of the fact that the leader schedule computation uses the two
     695             :      previous epoch stakes.
     696             : 
     697             :      TODO: This field has been deprecated by agave and has instead been
     698             :      replaced with the versioned epoch stakes field in the manifest. The
     699             :      firedancer client will populate the deprecated field. */
     700             : 
     701           0 :   fd_epoch_epoch_stakes_pair_t * relevant_epoch_stakes = fd_spad_alloc( snapshot_ctx->spad, FD_EPOCH_EPOCH_STAKES_PAIR_ALIGN, 2UL * sizeof(fd_epoch_epoch_stakes_pair_t) );
     702           0 :   fd_memset( &relevant_epoch_stakes[0], 0UL, sizeof(fd_epoch_epoch_stakes_pair_t) );
     703           0 :   fd_memset( &relevant_epoch_stakes[1], 0UL, sizeof(fd_epoch_epoch_stakes_pair_t) );
     704           0 :   relevant_epoch_stakes[0].key                        = bank->epoch;
     705           0 :   relevant_epoch_stakes[0].value.stakes.vote_accounts = slot_bank->epoch_stakes;
     706           0 :   relevant_epoch_stakes[1].key                        = bank->epoch+1UL;
     707           0 :   relevant_epoch_stakes[1].value.stakes.vote_accounts = epoch_bank->next_epoch_stakes;
     708             : 
     709           0 :   bank->epoch_stakes_len = 2UL;
     710           0 :   bank->epoch_stakes     = relevant_epoch_stakes;
     711           0 :   bank->is_delta         = snapshot_ctx->is_incremental;
     712             : 
     713             :   /* The firedancer runtime currently maintains a version of the stakes which
     714             :      can't be reserialized into a format that is compatible with the Solana
     715             :      snapshot format. Therefore, we must recompute the data structure using
     716             :      the pubkeys from the stakes cache that is currently in the epoch context. */
     717             : 
     718           0 :   fd_snapshot_create_serialiable_stakes( snapshot_ctx, &epoch_bank->stakes, &bank->stakes );
     719             : 
     720           0 : }
     721             : 
     722             : static inline void
     723           0 : fd_snapshot_create_setup_and_validate_ctx( fd_snapshot_ctx_t * snapshot_ctx ) {
     724             : 
     725           0 :   fd_funk_t * funk = snapshot_ctx->funk;
     726             : 
     727             :   /* Initialize the account manager. */
     728             : 
     729           0 :   uchar * mem = fd_spad_alloc( snapshot_ctx->spad, FD_ACC_MGR_ALIGN, FD_ACC_MGR_FOOTPRINT );
     730           0 :   snapshot_ctx->acc_mgr = fd_acc_mgr_new( mem, funk );
     731           0 :   if( FD_UNLIKELY( !snapshot_ctx->acc_mgr ) ) {
     732           0 :     FD_LOG_ERR(( "Failed to initialize account manager" ));
     733           0 :   }
     734             : 
     735             :   /* First the epoch bank. */
     736             : 
     737           0 :   fd_funk_rec_key_t     epoch_id  = fd_runtime_epoch_bank_key();
     738           0 :   fd_funk_rec_t const * epoch_rec = fd_funk_rec_query( funk, NULL, &epoch_id );
     739           0 :   if( FD_UNLIKELY( !epoch_rec ) ) {
     740           0 :     FD_LOG_ERR(( "Failed to read epoch bank record: missing record" ));
     741           0 :   }
     742           0 :   void * epoch_val = fd_funk_val( epoch_rec, fd_funk_wksp( funk ) );
     743             : 
     744           0 :   if( FD_UNLIKELY( fd_funk_val_sz( epoch_rec )<sizeof(uint) ) ) {
     745           0 :     FD_LOG_ERR(( "Failed to read epoch bank record: empty record" ));
     746           0 :   }
     747             : 
     748           0 :   uint epoch_magic = *(uint*)epoch_val;
     749             : 
     750           0 :   fd_bincode_decode_ctx_t epoch_decode_ctx = {
     751           0 :     .data    = (uchar*)epoch_val + sizeof(uint),
     752           0 :     .dataend = (uchar*)epoch_val + fd_funk_val_sz( epoch_rec ),
     753           0 :   };
     754             : 
     755           0 :   if( FD_UNLIKELY( epoch_magic!=FD_RUNTIME_ENC_BINCODE ) ) {
     756           0 :     FD_LOG_ERR(( "Epoch bank record has wrong magic" ));
     757           0 :   }
     758             : 
     759           0 :   ulong total_sz = 0UL;
     760           0 :   int   err      = fd_epoch_bank_decode_footprint( &epoch_decode_ctx, &total_sz );
     761           0 :   if( FD_UNLIKELY( err!=FD_BINCODE_SUCCESS ) ) {
     762           0 :     FD_LOG_ERR(( "Failed to decode epoch bank" ));
     763           0 :   }
     764             : 
     765           0 :   uchar * epoch_bank_mem = fd_spad_alloc( snapshot_ctx->spad, FD_EPOCH_BANK_ALIGN, total_sz );
     766           0 :   if( FD_UNLIKELY( !epoch_bank_mem ) ) {
     767           0 :     FD_LOG_ERR(( "Failed to allocate memory for epoch bank" ));
     768           0 :   }
     769             : 
     770           0 :   fd_epoch_bank_decode( epoch_bank_mem, &epoch_decode_ctx );
     771             : 
     772           0 :   fd_memcpy( &snapshot_ctx->epoch_bank, epoch_bank_mem, sizeof(fd_epoch_bank_t) );
     773             : 
     774             :   /* Now the slot bank. */
     775             : 
     776           0 :   fd_funk_rec_key_t     slot_id  = fd_runtime_slot_bank_key();
     777           0 :   fd_funk_rec_t const * slot_rec = fd_funk_rec_query( funk, NULL, &slot_id );
     778           0 :   if( FD_UNLIKELY( !slot_rec ) ) {
     779           0 :     FD_LOG_ERR(( "Failed to read slot bank record: missing record" ));
     780           0 :   }
     781           0 :   void * slot_val = fd_funk_val( slot_rec, fd_funk_wksp( funk ) );
     782             : 
     783           0 :   if( FD_UNLIKELY( fd_funk_val_sz( slot_rec )<sizeof(uint) ) ) {
     784           0 :     FD_LOG_ERR(( "Failed to read slot bank record: empty record" ));
     785           0 :   }
     786             : 
     787           0 :   uint slot_magic = *(uint*)slot_val;
     788             : 
     789           0 :   fd_bincode_decode_ctx_t slot_decode_ctx = {
     790           0 :     .data    = (uchar*)slot_val + sizeof(uint),
     791           0 :     .dataend = (uchar*)slot_val + fd_funk_val_sz( slot_rec ),
     792           0 :   };
     793             : 
     794           0 :   if( FD_UNLIKELY( slot_magic!=FD_RUNTIME_ENC_BINCODE ) ) {
     795           0 :     FD_LOG_ERR(( "Slot bank record has wrong magic" ));
     796           0 :   }
     797             : 
     798           0 :   total_sz = 0UL;
     799           0 :   err      = fd_slot_bank_decode_footprint( &slot_decode_ctx, &total_sz );
     800           0 :   if( FD_UNLIKELY( err!=FD_BINCODE_SUCCESS ) ) {
     801           0 :     FD_LOG_ERR(( "Failed to decode slot bank" ));
     802           0 :   }
     803             : 
     804           0 :   uchar * slot_bank_mem = fd_spad_alloc( snapshot_ctx->spad, FD_SLOT_BANK_ALIGN, total_sz );
     805           0 :   if( FD_UNLIKELY( !slot_bank_mem ) ) {
     806           0 :     FD_LOG_ERR(( "Failed to allocate memory for slot bank" ));
     807           0 :   }
     808             : 
     809           0 :   fd_slot_bank_decode( slot_bank_mem, &slot_decode_ctx );
     810             : 
     811           0 :   memcpy( &snapshot_ctx->slot_bank, slot_bank_mem, sizeof(fd_slot_bank_t) );
     812             : 
     813             :   /* Validate that the snapshot context is setup correctly */
     814             : 
     815           0 :   if( FD_UNLIKELY( !snapshot_ctx->out_dir ) ) {
     816           0 :     FD_LOG_ERR(( "Snapshot directory is not set" ));
     817           0 :   }
     818             : 
     819           0 :   if( FD_UNLIKELY( snapshot_ctx->slot>snapshot_ctx->slot_bank.slot ) ) {
     820           0 :     FD_LOG_ERR(( "Snapshot slot=%lu is greater than the current slot=%lu",
     821           0 :                      snapshot_ctx->slot, snapshot_ctx->slot_bank.slot ));
     822           0 :   }
     823             : 
     824             :   /* Truncate the two files used for snapshot creation and seek to its start. */
     825             : 
     826           0 :   long seek = lseek( snapshot_ctx->tmp_fd, 0, SEEK_SET );
     827           0 :   if( FD_UNLIKELY( seek ) ) {
     828           0 :     FD_LOG_ERR(( "Failed to seek to the start of the file" ));
     829           0 :   }
     830             : 
     831           0 :   if( FD_UNLIKELY( ftruncate( snapshot_ctx->tmp_fd, 0UL ) < 0 ) ) {
     832           0 :     FD_LOG_ERR(( "Failed to truncate the temporary file" ));
     833           0 :   }
     834             : 
     835           0 :   seek = lseek( snapshot_ctx->snapshot_fd, 0, SEEK_SET );
     836           0 :   if( FD_UNLIKELY( seek ) ) {
     837           0 :     FD_LOG_ERR(( "Failed to seek to the start of the file" ));
     838           0 :   }
     839             : 
     840           0 :   if( FD_UNLIKELY( ftruncate( snapshot_ctx->snapshot_fd, 0UL ) < 0 ) ) {
     841           0 :     FD_LOG_ERR(( "Failed to truncate the snapshot file" ));
     842           0 :   }
     843             : 
     844           0 : }
     845             : 
     846             : static inline void
     847           0 : fd_snapshot_create_setup_writer( fd_snapshot_ctx_t * snapshot_ctx ) {
     848             : 
     849             :   /* Setup a tar writer. */
     850             : 
     851           0 :   uchar * writer_mem   = fd_spad_alloc( snapshot_ctx->spad, fd_tar_writer_align(), fd_tar_writer_footprint() );
     852           0 :   snapshot_ctx->writer = fd_tar_writer_new( writer_mem, snapshot_ctx->tmp_fd );
     853           0 :   if( FD_UNLIKELY( !snapshot_ctx->writer ) ) {
     854           0 :     FD_LOG_ERR(( "Unable to create a tar writer" ));
     855           0 :   }
     856           0 : }
     857             : 
     858             : static inline void
     859           0 : fd_snapshot_create_write_version( fd_snapshot_ctx_t * snapshot_ctx ) {
     860             : 
     861             :   /* The first file in the tar archive should be the version file.. */
     862             : 
     863           0 :   int err = fd_tar_writer_new_file( snapshot_ctx->writer, FD_SNAPSHOT_VERSION_FILE );
     864           0 :   if( FD_UNLIKELY( err ) ) {
     865           0 :     FD_LOG_ERR(( "Failed to create the version file" ));
     866           0 :   }
     867             : 
     868           0 :   err = fd_tar_writer_write_file_data( snapshot_ctx->writer, FD_SNAPSHOT_VERSION, FD_SNAPSHOT_VERSION_LEN);
     869           0 :   if( FD_UNLIKELY( err ) ) {
     870           0 :     FD_LOG_ERR(( "Failed to create the version file" ));
     871           0 :   }
     872             : 
     873           0 :   err = fd_tar_writer_fini_file( snapshot_ctx->writer );
     874           0 :   if( FD_UNLIKELY( err ) ) {
     875           0 :     FD_LOG_ERR(( "Failed to create the version file" ));
     876           0 :   }
     877             : 
     878           0 : }
     879             : 
     880             : static inline void
     881           0 : fd_snapshot_create_write_status_cache( fd_snapshot_ctx_t * snapshot_ctx ) {
     882             : 
     883             :   /* First convert the existing status cache into a snapshot-friendly format. */
     884             : 
     885           0 :   fd_bank_slot_deltas_t slot_deltas_new = {0};
     886           0 :   int err = fd_txncache_get_entries( snapshot_ctx->status_cache,
     887           0 :                                      &slot_deltas_new,
     888           0 :                                      snapshot_ctx->spad );
     889           0 :   if( FD_UNLIKELY( err ) ) {
     890           0 :     FD_LOG_ERR(( "Failed to get entries from the status cache" ));
     891           0 :   }
     892           0 :   ulong   bank_slot_deltas_sz = fd_bank_slot_deltas_size( &slot_deltas_new );
     893           0 :   uchar * out_status_cache    = fd_spad_alloc( snapshot_ctx->spad,
     894           0 :                                                FD_BANK_SLOT_DELTAS_ALIGN,
     895           0 :                                                bank_slot_deltas_sz );
     896           0 :   fd_bincode_encode_ctx_t encode_status_cache = {
     897           0 :     .data    = out_status_cache,
     898           0 :     .dataend = out_status_cache + bank_slot_deltas_sz,
     899           0 :   };
     900           0 :   if( FD_UNLIKELY( fd_bank_slot_deltas_encode( &slot_deltas_new, &encode_status_cache ) ) ) {
     901           0 :     FD_LOG_ERR(( "Failed to encode the status cache" ));
     902           0 :   }
     903             : 
     904             :   /* Now write out the encoded buffer to the tar archive. */
     905             : 
     906           0 :   err = fd_tar_writer_new_file( snapshot_ctx->writer, FD_SNAPSHOT_STATUS_CACHE_FILE );
     907           0 :   if( FD_UNLIKELY( err ) ) {
     908           0 :     FD_LOG_ERR(( "Failed to create the status cache file" ));
     909           0 :   }
     910           0 :   err = fd_tar_writer_write_file_data( snapshot_ctx->writer, out_status_cache, bank_slot_deltas_sz );
     911           0 :   if( FD_UNLIKELY( err ) ) {
     912           0 :     FD_LOG_ERR(( "Failed to create the status cache file" ));
     913           0 :   }
     914           0 :   err = fd_tar_writer_fini_file( snapshot_ctx->writer );
     915           0 :   if( FD_UNLIKELY( err ) ) {
     916           0 :     FD_LOG_ERR(( "Failed to create the status cache file" ));
     917           0 :   }
     918             : 
     919             :   /* Registers all roots and unconstipates the status cache. */
     920             : 
     921           0 :   fd_txncache_flush_constipated_slots( snapshot_ctx->status_cache );
     922             : 
     923           0 : }
     924             : 
     925             : static inline void
     926             : fd_snapshot_create_write_manifest_and_acc_vecs( fd_snapshot_ctx_t * snapshot_ctx,
     927             :                                                 fd_hash_t *         out_hash,
     928           0 :                                                 ulong *             out_capitalization ) {
     929             : 
     930             : 
     931           0 :   fd_solana_manifest_t manifest = {0};
     932             : 
     933             :   /* Copy in all the fields of the bank. */
     934             : 
     935           0 :   fd_snapshot_create_populate_bank( snapshot_ctx, &manifest.bank );
     936             : 
     937             :   /* Populate the rest of the manifest, except for the append vec index. */
     938             : 
     939           0 :   manifest.lamports_per_signature                = snapshot_ctx->slot_bank.lamports_per_signature;
     940           0 :   manifest.epoch_account_hash                    = &snapshot_ctx->slot_bank.epoch_account_hash;
     941             : 
     942             :   /* FIXME: The versioned epoch stakes needs to be implemented. Right now if
     943             :      we try to create a snapshot on or near an epoch boundary, we will produce
     944             :      an invalid snapshot. */
     945             : 
     946           0 :   manifest.versioned_epoch_stakes_len            = 0UL;
     947           0 :   manifest.versioned_epoch_stakes                = NULL;
     948             : 
     949             :   /* Populate the append vec index and write out the corresponding acc files. */
     950             : 
     951           0 :   ulong incr_capitalization = 0UL;
     952           0 :   fd_snapshot_create_populate_acc_vecs( snapshot_ctx, &manifest, snapshot_ctx->writer, &incr_capitalization );
     953             : 
     954             :   /* Once the append vec index is populated and the hashes are calculated,
     955             :      propogate the hashes to the correct fields. As a note, the last_snap_hash
     956             :      is the full snapshot's account hash. */
     957             : 
     958           0 :   if( snapshot_ctx->is_incremental ) {
     959           0 :     manifest.bank_incremental_snapshot_persistence->full_slot                  = snapshot_ctx->last_snap_slot;
     960           0 :     fd_memcpy( &manifest.bank_incremental_snapshot_persistence->full_hash, snapshot_ctx->last_snap_acc_hash, sizeof(fd_hash_t) );
     961           0 :     manifest.bank_incremental_snapshot_persistence->full_capitalization        = snapshot_ctx->last_snap_capitalization;
     962           0 :     manifest.bank_incremental_snapshot_persistence->incremental_hash           = snapshot_ctx->acc_hash;
     963           0 :     manifest.bank_incremental_snapshot_persistence->incremental_capitalization = incr_capitalization;
     964           0 :   } else {
     965           0 :     memcpy( out_hash, &manifest.accounts_db.bank_hash_info.accounts_hash, sizeof(fd_hash_t) );
     966           0 :     *out_capitalization = snapshot_ctx->slot_bank.capitalization;
     967           0 :   }
     968             : 
     969             :   /* At this point, all of the account files are written out and the append
     970             :      vec index is populated in the manifest. We have already reserved space
     971             :      in the archive for the manifest. All we need to do now is encode the
     972             :      manifest and write it in. */
     973             : 
     974           0 :   ulong   manifest_sz  = fd_solana_manifest_size( &manifest );
     975           0 :   uchar * out_manifest = fd_spad_alloc( snapshot_ctx->spad, fd_solana_manifest_align(), manifest_sz );
     976             : 
     977           0 :   fd_bincode_encode_ctx_t encode = {
     978           0 :     .data    = out_manifest,
     979           0 :     .dataend = out_manifest + manifest_sz
     980           0 :   };
     981             : 
     982           0 :   int err = fd_solana_manifest_encode( &manifest, &encode );
     983           0 :   if( FD_UNLIKELY( err ) ) {
     984           0 :     FD_LOG_ERR(( "Failed to encode the manifest" ));
     985           0 :   }
     986             : 
     987           0 :   err = fd_tar_writer_fill_space( snapshot_ctx->writer, out_manifest, manifest_sz );
     988           0 :   if( FD_UNLIKELY( err ) ) {
     989           0 :     FD_LOG_ERR(( "Failed to write out the manifest" ));
     990           0 :   }
     991             : 
     992           0 :   void * mem = fd_tar_writer_delete( snapshot_ctx->writer );
     993           0 :   if( FD_UNLIKELY( !mem ) ) {
     994           0 :     FD_LOG_ERR(( "Unable to delete the tar writer" ));
     995           0 :   }
     996             : 
     997           0 : }
     998             : 
     999             : static inline void
    1000           0 : fd_snapshot_create_compress( fd_snapshot_ctx_t * snapshot_ctx ) {
    1001             : 
    1002             :   /* Compress the file using zstd. First open the non-compressed file and
    1003             :      create a file for the compressed file. The reason why we can't do this
    1004             :      as we stream out the snapshot archive is that we write back into the
    1005             :      manifest buffer.
    1006             : 
    1007             :      TODO: A way to eliminate this and to just stream out
    1008             :      1 compressed file would be to totally precompute the index such that
    1009             :      we don't have to write back into funk.
    1010             : 
    1011             :      TODO: Currently, the snapshot service interfaces directly with the zstd
    1012             :      library but a generalized cstream defined in fd_zstd should be used
    1013             :      instead. */
    1014             : 
    1015           0 :   ulong in_buf_sz   = ZSTD_CStreamInSize();
    1016           0 :   ulong zstd_buf_sz = ZSTD_CStreamOutSize();
    1017           0 :   ulong out_buf_sz  = ZSTD_CStreamOutSize();
    1018             : 
    1019           0 :   char * in_buf   = fd_spad_alloc( snapshot_ctx->spad, FD_ZSTD_CSTREAM_ALIGN, in_buf_sz );
    1020           0 :   char * zstd_buf = fd_spad_alloc( snapshot_ctx->spad, FD_ZSTD_CSTREAM_ALIGN, out_buf_sz );
    1021           0 :   char * out_buf  = fd_spad_alloc( snapshot_ctx->spad, FD_ZSTD_CSTREAM_ALIGN, out_buf_sz );
    1022             : 
    1023             :   /* Reopen the tarball and open/overwrite the filename for the compressed,
    1024             :      finalized full snapshot. Setup the zstd compression stream. */
    1025             : 
    1026           0 :   int err = 0;
    1027             : 
    1028           0 :   ZSTD_CStream * cstream = ZSTD_createCStream();
    1029           0 :   if( FD_UNLIKELY( !cstream ) ) {
    1030           0 :     FD_LOG_ERR(( "Failed to create the zstd compression stream" ));
    1031           0 :   }
    1032           0 :   ZSTD_initCStream( cstream, ZSTD_CLEVEL_DEFAULT );
    1033             : 
    1034           0 :   fd_io_buffered_ostream_t ostream[1];
    1035             : 
    1036           0 :   if( FD_UNLIKELY( !fd_io_buffered_ostream_init( ostream, snapshot_ctx->snapshot_fd, out_buf, out_buf_sz ) ) ) {
    1037           0 :     FD_LOG_ERR(( "Failed to initialize the ostream" ));
    1038           0 :   }
    1039             : 
    1040           0 :   long seek = lseek( snapshot_ctx->snapshot_fd, 0, SEEK_SET );
    1041           0 :   if( FD_UNLIKELY( seek!=0L ) ) {
    1042           0 :     FD_LOG_ERR(( "Failed to seek to the start of the file" ));
    1043           0 :   }
    1044             : 
    1045             :   /* At this point, the tar archive and the new zstd file is open. The zstd
    1046             :      streamer is still open. Now, we are ready to read in bytes and stream
    1047             :      compress them. We will keep going until we see an EOF in a tar archive. */
    1048             : 
    1049           0 :   ulong in_sz = in_buf_sz;
    1050             : 
    1051           0 :   ulong off = (ulong)lseek( snapshot_ctx->tmp_fd, 0, SEEK_SET );
    1052           0 :   if( FD_UNLIKELY( off ) ) {
    1053           0 :     FD_LOG_ERR(( "Failed to seek to the beginning of the file" ));
    1054           0 :   }
    1055             : 
    1056           0 :   while( in_sz==in_buf_sz ) {
    1057             : 
    1058             :     /* Read chunks from the file. There isn't really a need to use a streamed
    1059             :        reader here because we will read in the max size buffer for every single
    1060             :        file read except for the very last one.
    1061             : 
    1062             :        in_sz will only not equal in_buf_sz on the last read. */
    1063           0 :     err = fd_io_read( snapshot_ctx->tmp_fd, in_buf, 0UL, in_buf_sz, &in_sz );
    1064           0 :     if( FD_UNLIKELY( err ) ) {
    1065           0 :       FD_LOG_ERR(( "Failed to read in the file" ));
    1066           0 :     }
    1067             : 
    1068             :     /* Compress the in memory buffer and add it to the output stream. */
    1069             : 
    1070           0 :     ZSTD_inBuffer input = { in_buf, in_sz, 0UL };
    1071           0 :     while( input.pos<input.size ) {
    1072           0 :       ZSTD_outBuffer output = { zstd_buf, zstd_buf_sz, 0UL };
    1073           0 :       ulong          ret    = ZSTD_compressStream( cstream, &output, &input );
    1074             : 
    1075           0 :       if( FD_UNLIKELY( ZSTD_isError( ret ) ) ) {
    1076           0 :         FD_LOG_ERR(( "Compression error: %s\n", ZSTD_getErrorName( ret ) ));
    1077           0 :       }
    1078             : 
    1079           0 :       err = fd_io_buffered_ostream_write( ostream, zstd_buf, output.pos );
    1080           0 :       if( FD_UNLIKELY( err ) ) {
    1081           0 :         FD_LOG_ERR(( "Failed to write out the compressed file" ));
    1082           0 :       }
    1083           0 :     }
    1084           0 :   }
    1085             : 
    1086             :   /* Now flush any bytes left in the zstd buffer, cleanup open file
    1087             :      descriptors, and deinit any data structures.  */
    1088             : 
    1089           0 :   ZSTD_outBuffer output    = { zstd_buf, zstd_buf_sz, 0UL };
    1090           0 :   ulong          remaining = ZSTD_endStream(  cstream, &output );
    1091             : 
    1092           0 :   if( FD_UNLIKELY( ZSTD_isError( remaining ) ) ) {
    1093           0 :     FD_LOG_ERR(( "Unable to end the zstd stream" ));
    1094           0 :   }
    1095           0 :   if( output.pos>0UL ) {
    1096           0 :     fd_io_buffered_ostream_write( ostream, zstd_buf, output.pos );
    1097           0 :   }
    1098             : 
    1099           0 :   ZSTD_freeCStream( cstream ); /* Works even if cstream is null */
    1100           0 :   err = fd_io_buffered_ostream_flush( ostream );
    1101           0 :   if( FD_UNLIKELY( err ) ) {
    1102           0 :     FD_LOG_ERR(( "Failed to flush the ostream" ));
    1103           0 :   }
    1104             : 
    1105             :   /* Assuming that there was a successful write, make the compressed
    1106             :      snapshot file readable and servable. */
    1107             : 
    1108           0 :   char tmp_directory_buf_zstd[ FD_SNAPSHOT_DIR_MAX ];
    1109           0 :   err = snprintf( tmp_directory_buf_zstd, FD_SNAPSHOT_DIR_MAX, "%s/%s", snapshot_ctx->out_dir, snapshot_ctx->is_incremental ? FD_SNAPSHOT_TMP_INCR_ARCHIVE_ZSTD : FD_SNAPSHOT_TMP_FULL_ARCHIVE_ZSTD );
    1110           0 :   if( FD_UNLIKELY( err<0 ) ) {
    1111           0 :     FD_LOG_ERR(( "Failed to format directory string" ));
    1112           0 :   }
    1113             : 
    1114           0 :   char directory_buf_zstd[ FD_SNAPSHOT_DIR_MAX ];
    1115           0 :   if( !snapshot_ctx->is_incremental ) {
    1116           0 :     err = snprintf( directory_buf_zstd, FD_SNAPSHOT_DIR_MAX, "%s/snapshot-%lu-%s.tar.zst",
    1117           0 :                     snapshot_ctx->out_dir, snapshot_ctx->slot, FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->snap_hash) );
    1118           0 :   } else {
    1119           0 :     err = snprintf( directory_buf_zstd, FD_SNAPSHOT_DIR_MAX, "%s/incremental-snapshot-%lu-%lu-%s.tar.zst",
    1120           0 :                     snapshot_ctx->out_dir, snapshot_ctx->last_snap_slot, snapshot_ctx->slot, FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->snap_hash) );
    1121           0 :   }
    1122             : 
    1123           0 :   if( FD_UNLIKELY( err<0 ) ) {
    1124           0 :     FD_LOG_ERR(( "Failed to format directory string" ));
    1125           0 :   }
    1126             : 
    1127           0 :   err = rename( tmp_directory_buf_zstd, directory_buf_zstd );
    1128           0 :   if( FD_UNLIKELY( err<0 ) ) {
    1129           0 :     FD_LOG_ERR(( "Failed to rename file from %s to %s (%i-%s)", tmp_directory_buf_zstd, directory_buf_zstd, errno, fd_io_strerror( errno ) ));
    1130           0 :   }
    1131             : 
    1132           0 : }
    1133             : 
    1134             : void
    1135             : fd_snapshot_create_new_snapshot( fd_snapshot_ctx_t * snapshot_ctx,
    1136             :                                  fd_hash_t *         out_hash,
    1137           0 :                                  ulong *             out_capitalization ) {
    1138             : 
    1139           0 :   FD_LOG_NOTICE(( "Starting to produce a snapshot for slot=%lu in directory=%s", snapshot_ctx->slot, snapshot_ctx->out_dir ));
    1140             : 
    1141             :   /* Validate that the snapshot_ctx is setup correctly. */
    1142             : 
    1143           0 :   fd_snapshot_create_setup_and_validate_ctx( snapshot_ctx );
    1144             : 
    1145             :   /* Setup the tar archive writer. */
    1146             : 
    1147           0 :   fd_snapshot_create_setup_writer( snapshot_ctx );
    1148             : 
    1149             :   /* Write out the version file. */
    1150             : 
    1151           0 :   fd_snapshot_create_write_version( snapshot_ctx );
    1152             : 
    1153             :   /* Dump the status cache and append it to the tar archive. */
    1154             : 
    1155           0 :   fd_snapshot_create_write_status_cache( snapshot_ctx );
    1156             : 
    1157             :   /* Populate and write out the manifest and append vecs. */
    1158             : 
    1159           0 :   fd_snapshot_create_write_manifest_and_acc_vecs( snapshot_ctx, out_hash, out_capitalization );
    1160             : 
    1161             :   /* Compress the tar file and write it out to the specified directory. */
    1162             : 
    1163           0 :   fd_snapshot_create_compress( snapshot_ctx );
    1164             : 
    1165           0 :   FD_LOG_NOTICE(( "Finished producing a snapshot" ));
    1166             : 
    1167           0 : }

Generated by: LCOV version 1.14