LCOV - code coverage report
Current view: top level - flamenco/snapshot - fd_snapshot_create.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 668 0.0 %
Date: 2025-01-08 12:08:44 Functions: 0 11 0.0 %

          Line data    Source code
       1             : #include "fd_snapshot_create.h"
       2             : #include "../runtime/sysvar/fd_sysvar_epoch_schedule.h"
       3             : #include "../../ballet/zstd/fd_zstd.h"
       4             : #include "../runtime/fd_hashes.h"
       5             : #include "../runtime/fd_runtime.h"
       6             : 
       7             : #include <errno.h>
       8             : #include <stdio.h>
       9             : #include <stdlib.h>
      10             : #include <sys/stat.h>
      11             : #include <sys/types.h>
      12             : #include <unistd.h>
      13             : #include <zstd.h>
      14             : 
      15             : static uchar             padding[ FD_SNAPSHOT_ACC_ALIGN ] = {0};
      16             : static fd_account_meta_t default_meta = { .magic = FD_ACCOUNT_META_MAGIC };
      17             : 
      18             : static inline fd_account_meta_t *
      19           0 : fd_snapshot_create_get_default_meta( ulong slot ) {
      20           0 :   default_meta.slot = slot;
      21           0 :   return &default_meta;
      22           0 : }
      23             : 
      24             : static inline void
      25             : fd_snapshot_create_populate_acc_vecs( fd_snapshot_ctx_t                 * snapshot_ctx,
      26             :                                       fd_solana_manifest_serializable_t * manifest,
      27             :                                       fd_tar_writer_t                   * writer,
      28           0 :                                       ulong                             * out_cap ) {
      29             : 
      30             :   /* The append vecs need to be described in an index in the manifest so a
      31             :      reader knows what account files to look for. These files are technically
      32             :      slot indexed, but the Firedancer implementation of the Solana snapshot
      33             :      produces far fewer indices. These storages are for the accounts 
      34             :      that were modified and deleted in the most recent slot because that 
      35             :      information is used by the Agave client to calculate and verify the 
      36             :      bank hash for the given slot. This is done as an optimization to avoid
      37             :      having to slot index the Firedancer accounts db which would incur a large
      38             :      performance hit.
      39             :      
      40             :      To avoid iterating through the root twice to determine what accounts were
      41             :      touched in the snapshot slot and what accounts were touched in the
      42             :      other slots, we will create an array of pubkey pointers for all accounts
      43             :      that were touched in the snapshot slot. This buffer can be safely sized to 
      44             :      the maximum amount of writable accounts that are possible in a non-epoch
      45             :      boundary slot. The rationale for this bound is explained in fd_runtime.h.
      46             :      We will not attempt to create a snapshot on an epoch boundary. 
      47             :      
      48             :      TODO: We must add compaction here. */
      49             : 
      50           0 :   fd_pubkey_t * * snapshot_slot_keys    = fd_valloc_malloc( snapshot_ctx->valloc, alignof(fd_pubkey_t*), sizeof(fd_pubkey_t*) * FD_WRITABLE_ACCS_IN_SLOT );
      51           0 :   ulong           snapshot_slot_key_cnt = 0UL;
      52             : 
      53             :   /* We will dynamically resize the number of incremental keys because the upper
      54             :      bound will be roughly 8 bytes * writable accs in a slot * number of slots
      55             :      since the last full snapshot which can quickly grow to be severalgigabytes
      56             :      or more. In the normal case, this won't require dynamic resizing. */
      57           0 :   #define FD_INCREMENTAL_KEY_INIT_BOUND (100000UL)
      58           0 :   ulong                       incremental_key_bound = FD_INCREMENTAL_KEY_INIT_BOUND;
      59           0 :   ulong                       incremental_key_cnt   = 0UL;
      60           0 :   fd_funk_rec_key_t const * * incremental_keys      = snapshot_ctx->is_incremental ? 
      61           0 :                                                       fd_valloc_malloc( snapshot_ctx->valloc, alignof(fd_funk_rec_key_t*), sizeof(fd_funk_rec_key_t*) * incremental_key_bound ) :
      62           0 :                                                       NULL;
      63             : 
      64           0 :   #undef FD_INCREMENTAL_KEY_INIT_BOUND
      65             : 
      66             :   /* In order to size out the accounts DB index in the manifest, we must
      67             :      iterate through funk and accumulate the size of all of the records
      68             :      from all slots before the snapshot_slot. */
      69             : 
      70           0 :   fd_funk_t * funk           = snapshot_ctx->acc_mgr->funk;
      71           0 :   ulong       prev_sz        = 0UL;
      72           0 :   ulong       tombstones_cnt = 0UL;
      73           0 :   for( fd_funk_rec_t const * rec = fd_funk_txn_first_rec( funk, NULL ); NULL != rec; rec = fd_funk_txn_next_rec( funk, rec ) ) {
      74             : 
      75           0 :     if( !fd_funk_key_is_acc( rec->pair.key ) ) {
      76           0 :       continue;
      77           0 :     }
      78             : 
      79           0 :     tombstones_cnt++;
      80             : 
      81           0 :     int                 is_tombstone = rec->flags & FD_FUNK_REC_FLAG_ERASE;                     
      82           0 :     uchar const *       raw          = fd_funk_val( rec, fd_funk_wksp( funk ) );
      83           0 :     fd_account_meta_t * metadata     = is_tombstone ? fd_snapshot_create_get_default_meta( fd_funk_rec_get_erase_data( rec ) ) : 
      84           0 :                                                       (fd_account_meta_t*)raw;
      85             : 
      86           0 :     if( !metadata ) {
      87           0 :       continue;
      88           0 :     }
      89             : 
      90           0 :     if( metadata->magic!=FD_ACCOUNT_META_MAGIC ) {
      91           0 :       continue;
      92           0 :     }
      93             : 
      94           0 :     if( snapshot_ctx->is_incremental ) {
      95             :       /* We only care about accounts that were modified since the last
      96             :          snapshot slot for incremental snapshots. 
      97             :          
      98             :          We also need to keep track of the capitalization for all of the
      99             :          accounts that are in the incremental as this is verified. */
     100           0 :       if( metadata->slot<=snapshot_ctx->last_snap_slot ) {
     101           0 :         continue;
     102           0 :       }
     103           0 :       incremental_keys[ incremental_key_cnt++ ] = rec->pair.key;
     104           0 :       *out_cap += metadata->info.lamports;
     105             : 
     106           0 :       if( FD_UNLIKELY( incremental_key_cnt==incremental_key_bound ) ) {
     107             :         /* Dynamically resize if needed. */
     108           0 :         incremental_key_bound *= 2UL;
     109           0 :         fd_funk_rec_key_t const * * new_incremental_keys = fd_valloc_malloc( snapshot_ctx->valloc, 
     110           0 :                                                                              alignof(fd_funk_rec_key_t*),
     111           0 :                                                                              sizeof(fd_funk_rec_key_t*) * incremental_key_bound );
     112           0 :         fd_memcpy( new_incremental_keys, incremental_keys, sizeof(fd_funk_rec_key_t*) * incremental_key_cnt );
     113           0 :         fd_valloc_free( snapshot_ctx->valloc, incremental_keys );
     114           0 :         incremental_keys = new_incremental_keys;
     115           0 :       }
     116           0 :     }
     117             : 
     118             :     /* We know that all of the accounts from the snapshot slot can fit into
     119             :        one append vec, so we ignore all accounts from the snapshot slot. */
     120             : 
     121           0 :     if( metadata->slot==snapshot_ctx->slot ) {
     122           0 :       continue;
     123           0 :     }
     124             : 
     125           0 :     prev_sz += metadata->dlen + sizeof(fd_solana_account_hdr_t);
     126             : 
     127           0 :   }
     128             : 
     129             :   /* At this point we have sized out all of the relevant accounts that will 
     130             :      be included in the snapshot. Now we must populate each of the append vecs
     131             :      and update the index as we go.
     132             :   
     133             :      When we account for the number of slots we need to consider one append vec
     134             :      for the snapshot slot and try to maximally fill up the others: an append
     135             :      vec has a protocol-defined maximum size in Agave.  */
     136             : 
     137           0 :   ulong num_slots = 1UL + prev_sz / FD_SNAPSHOT_APPEND_VEC_SZ_MAX + 
     138           0 :                     (prev_sz % FD_SNAPSHOT_APPEND_VEC_SZ_MAX ? 1UL : 0UL);
     139             : 
     140           0 :   fd_solana_accounts_db_fields_t * accounts_db = &manifest->accounts_db;
     141             : 
     142           0 :   accounts_db->storages_len                    = num_slots;
     143           0 :   accounts_db->storages                        = fd_valloc_malloc( snapshot_ctx->valloc,
     144           0 :                                                                    FD_SNAPSHOT_SLOT_ACC_VECS_ALIGN,
     145           0 :                                                                    sizeof(fd_snapshot_slot_acc_vecs_t) * accounts_db->storages_len );
     146           0 :   accounts_db->version                        = 1UL;
     147           0 :   accounts_db->slot                           = snapshot_ctx->slot;
     148           0 :   accounts_db->historical_roots_len           = 0UL;
     149           0 :   accounts_db->historical_roots               = NULL;
     150           0 :   accounts_db->historical_roots_with_hash_len = 0UL;
     151           0 :   accounts_db->historical_roots_with_hash     = NULL;
     152             : 
     153           0 :   for( ulong i=0UL; i<num_slots; i++ ) {
     154             :     /* Populate the storages for each slot. As a note, the slot number only
     155             :        matters for the snapshot slot. The other slot numbers don't affect
     156             :        consensus at all. Agave also maintains an invariant that there can 
     157             :        only be one account vec per storage. */
     158             : 
     159           0 :     accounts_db->storages[ i ].account_vecs_len          = 1UL;
     160           0 :     accounts_db->storages[ i ].account_vecs              = fd_valloc_malloc( snapshot_ctx->valloc,
     161           0 :                                                                              FD_SNAPSHOT_ACC_VEC_ALIGN,
     162           0 :                                                                              sizeof(fd_snapshot_acc_vec_t) * accounts_db->storages[ i ].account_vecs_len );
     163           0 :     accounts_db->storages[ i ].account_vecs[ 0 ].file_sz = 0UL;
     164           0 :     accounts_db->storages[ i ].account_vecs[ 0 ].id      = i + 1UL;
     165           0 :     accounts_db->storages[ i ].slot                      = snapshot_ctx->slot - i;
     166           0 :   }
     167             : 
     168             :   /* At this point we have iterated through all of the accounts and created
     169             :      the index. We are now ready to generate a snapshot hash. For both 
     170             :      snapshots we need to generate two hashes:
     171             :      1. The accounts hash. This is a simple hash of all of the accounts
     172             :         included in the snapshot.
     173             :      2. The snapshot hash. This is a hash of the accounts hash and the epoch
     174             :         account hash. If the EAH is not included, then the accounts hash ==
     175             :         snapshot hash.
     176             :         
     177             :     There is some nuance as to which hash goes where. For full snapshots,
     178             :     the accounts hash in the bank hash info is the accounts hash. The hash in
     179             :     the filename is the snapshot hash.
     180             :     
     181             :     For incremental snapshots, the account hash in the bank hash info field is
     182             :     left zeroed out. The full snapshot's hash is in the incremental persistence
     183             :     field. The incremental snapshot's accounts hash is included in the 
     184             :     incremental persistence field. The hash in the filename is the snapshot 
     185             :     hash. */
     186             : 
     187           0 :   int err;
     188           0 :   if( !snapshot_ctx->is_incremental ) {
     189           0 :     err = fd_snapshot_service_hash( &snapshot_ctx->acc_hash, 
     190           0 :                                     &snapshot_ctx->snap_hash,
     191           0 :                                     &snapshot_ctx->slot_bank, 
     192           0 :                                     &snapshot_ctx->epoch_bank,
     193           0 :                                     snapshot_ctx->acc_mgr->funk,
     194           0 :                                     snapshot_ctx->tpool,
     195           0 :                                     snapshot_ctx->valloc );
     196           0 :     accounts_db->bank_hash_info.accounts_hash = snapshot_ctx->acc_hash;
     197           0 :   } else {
     198           0 :     err = fd_snapshot_service_inc_hash( &snapshot_ctx->acc_hash, 
     199           0 :                                         &snapshot_ctx->snap_hash,
     200           0 :                                         &snapshot_ctx->slot_bank, 
     201           0 :                                         &snapshot_ctx->epoch_bank,
     202           0 :                                         snapshot_ctx->acc_mgr->funk,
     203           0 :                                         incremental_keys,
     204           0 :                                         incremental_key_cnt,
     205           0 :                                         snapshot_ctx->valloc );
     206           0 :     fd_valloc_free( snapshot_ctx->valloc, incremental_keys );
     207             : 
     208           0 :     fd_memset( &accounts_db->bank_hash_info.accounts_hash, 0, sizeof(fd_hash_t) );
     209           0 :   }
     210             : 
     211           0 :   FD_LOG_NOTICE(( "Hashes calculated acc_hash=%s snapshot_hash=%s",
     212           0 :                   FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->acc_hash),
     213           0 :                   FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->snap_hash) ));
     214             : 
     215           0 :   if( FD_UNLIKELY( err ) ) {
     216           0 :     FD_LOG_ERR(( "Unable to calculate snapshot hash" ));
     217           0 :   }
     218             : 
     219           0 :   fd_memset( &accounts_db->bank_hash_info.stats, 0, sizeof(fd_bank_hash_stats_t) );
     220             : 
     221             :   /* Now, we have calculated the relevant hashes for the accounts.
     222             :      Because the files are serially written out for tar and we need to prepend
     223             :      the manifest, we must reserve space in the archive for the solana manifest. */
     224             : 
     225           0 :   if( snapshot_ctx->is_incremental ) {
     226           0 :     manifest->bank_incremental_snapshot_persistence = fd_valloc_malloc( snapshot_ctx->valloc,
     227           0 :                                                                         FD_BANK_INCREMENTAL_SNAPSHOT_PERSISTENCE_ALIGN, 
     228           0 :                                                                         sizeof(fd_bank_incremental_snapshot_persistence_t) );
     229           0 :   }
     230             : 
     231           0 :   ulong manifest_sz = fd_solana_manifest_serializable_size( manifest ); 
     232             : 
     233           0 :   char buffer[ FD_SNAPSHOT_DIR_MAX ];
     234           0 :   err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "snapshots/%lu/%lu", snapshot_ctx->slot, snapshot_ctx->slot );
     235           0 :   if( FD_UNLIKELY( err<0 ) ) {
     236           0 :     FD_LOG_ERR(( "Unable to format manifest name string" ));
     237           0 :   }
     238             : 
     239           0 :   err = fd_tar_writer_new_file( writer, buffer );
     240           0 :   if( FD_UNLIKELY( err ) ) {
     241           0 :     FD_LOG_ERR(( "Unable to create snapshot manifest file" ));
     242           0 :   }
     243             :   
     244             :   /* TODO: We want to eliminate having to write back into the tar file. This
     245             :      will enable the snapshot service to only use one file per snapshot.
     246             :      In order to do this, we must precompute the index in the manifest 
     247             :      completely. This will allow us to stream out a compressed snapshot. */
     248             : 
     249           0 :   err = fd_tar_writer_make_space( writer, manifest_sz );
     250           0 :   if( FD_UNLIKELY( err ) ) {
     251           0 :     FD_LOG_ERR(( "Unable to make space for snapshot manifest file" ));
     252           0 :   }
     253             : 
     254           0 :   err = fd_tar_writer_fini_file( writer );
     255           0 :   if( FD_UNLIKELY( err ) ) {
     256           0 :     FD_LOG_ERR(( "Unable to finalize snapshot manifest file" ));
     257           0 :   }
     258             : 
     259             :   /* We have made space for the manifest and are ready to append the append
     260             :      vec files directly into the tar archive. We will iterate through all of
     261             :      the records in the funk root and create/populate an append vec for 
     262             :      previous slots. Just record the pubkeys for the latest slot to populate 
     263             :      the append vec after. If the append vec is full, write into the next one. */
     264             : 
     265           0 :   ulong curr_slot = 1UL;
     266           0 :   fd_snapshot_acc_vec_t * prev_accs = &accounts_db->storages[ curr_slot ].account_vecs[ 0UL ];
     267             : 
     268           0 :   err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "accounts/%lu.%lu", snapshot_ctx->slot - curr_slot, prev_accs->id );
     269           0 :   if( FD_UNLIKELY( err<0 ) ) {
     270           0 :     FD_LOG_ERR(( "Unable to format previous accounts name string" ));
     271           0 :   }
     272             : 
     273           0 :   err = fd_tar_writer_new_file( writer, buffer );
     274           0 :   if( FD_UNLIKELY( err ) ) {
     275           0 :     FD_LOG_ERR(( "Unable to create previous accounts file" ));
     276           0 :   }
     277             : 
     278           0 :   fd_funk_rec_t * * tombstones = snapshot_ctx->is_incremental ? NULL : 
     279           0 :                                  fd_valloc_malloc( snapshot_ctx->valloc, alignof(fd_funk_rec_t*), sizeof(fd_funk_rec_t*) * tombstones_cnt );
     280           0 :   tombstones_cnt = 0UL;
     281             : 
     282           0 :   for( fd_funk_rec_t const * rec = fd_funk_txn_first_rec( funk, NULL ); NULL != rec; rec = fd_funk_txn_next_rec( funk, rec ) ) {
     283             : 
     284             :     /* Get the account data. */
     285             : 
     286           0 :     if( !fd_funk_key_is_acc( rec->pair.key ) ) {
     287           0 :       continue;
     288           0 :     }
     289             : 
     290           0 :     fd_pubkey_t const * pubkey       = fd_type_pun_const( rec->pair.key[0].uc );
     291           0 :     int                 is_tombstone = rec->flags & FD_FUNK_REC_FLAG_ERASE;                     
     292           0 :     uchar const *       raw          = fd_funk_val( rec, fd_funk_wksp( funk ) );
     293           0 :     fd_account_meta_t * metadata     = is_tombstone ? fd_snapshot_create_get_default_meta( fd_funk_rec_get_erase_data( rec ) ) : 
     294           0 :                                                       (fd_account_meta_t*)raw;
     295             : 
     296           0 :     if( !snapshot_ctx->is_incremental && is_tombstone ) {
     297             :       /* If we are in a full snapshot, we need to gather all of the accounts
     298             :          that we plan on deleting. */
     299           0 :       tombstones[ tombstones_cnt++ ] = (fd_funk_rec_t*)rec;
     300           0 :     }
     301             : 
     302           0 :     if( !metadata ) {
     303           0 :       continue;
     304           0 :     }
     305             : 
     306           0 :     if( metadata->magic!=FD_ACCOUNT_META_MAGIC ) {
     307           0 :       continue;
     308           0 :     } 
     309             : 
     310             :     /* Don't iterate through accounts that were touched before the last full
     311             :        snapshot. */
     312           0 :     if( snapshot_ctx->is_incremental && metadata->slot<=snapshot_ctx->last_snap_slot ) {
     313           0 :       continue;
     314           0 :     }
     315             : 
     316           0 :     uchar const * acc_data = raw + metadata->hlen;
     317             : 
     318             :     /* All accounts that were touched in the snapshot slot should be in 
     319             :        a different append vec so that Agave can calculate the snapshot slot's
     320             :        bank hash. We don't want to include them in an arbitrary append vec. */
     321             : 
     322           0 :     if( metadata->slot==snapshot_ctx->slot ) {
     323           0 :       snapshot_slot_keys[ snapshot_slot_key_cnt++ ] = (fd_pubkey_t*)pubkey;
     324           0 :       continue;
     325           0 :     }
     326             : 
     327             :     /* We don't want to iterate over tombstones if the snapshot is not
     328             :        incremental */
     329           0 :     if( !snapshot_ctx->is_incremental && is_tombstone ) {
     330           0 :       continue;
     331           0 :     }
     332             : 
     333           0 :     ulong new_sz = prev_accs->file_sz + sizeof(fd_solana_account_hdr_t) + fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN );
     334             : 
     335           0 :     if( new_sz>FD_SNAPSHOT_APPEND_VEC_SZ_MAX ) {
     336             : 
     337             :       /* When the current append vec is full, finish writing it, start writing 
     338             :          into the next append vec. */
     339             : 
     340           0 :       err = fd_tar_writer_fini_file( writer );
     341           0 :       if( FD_UNLIKELY( err ) ) {
     342           0 :         FD_LOG_ERR(( "Unable to finalize previous accounts file" ));
     343           0 :       }
     344             : 
     345           0 :       prev_accs = &accounts_db->storages[ ++curr_slot ].account_vecs[ 0UL ];
     346             : 
     347           0 :       err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "accounts/%lu.%lu", snapshot_ctx->slot - curr_slot, prev_accs->id );
     348           0 :       if( FD_UNLIKELY( err<0 ) ) {
     349           0 :         FD_LOG_ERR(( "Unable to format previous accounts name string" ));
     350           0 :       }
     351             : 
     352           0 :       err = fd_tar_writer_new_file( writer, buffer );
     353           0 :       if( FD_UNLIKELY( err ) ) {
     354           0 :         FD_LOG_ERR(( "Unable to create previous accounts file" ));
     355           0 :       }
     356           0 :     }
     357             : 
     358           0 :     prev_accs->file_sz += sizeof(fd_solana_account_hdr_t) + fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN );
     359             : 
     360             : 
     361             :     /* Write out the header. */
     362             : 
     363           0 :     fd_solana_account_hdr_t header = {0};
     364             :     /* Stored meta */
     365           0 :     header.meta.write_version_obsolete = 0UL;
     366           0 :     header.meta.data_len               = metadata->dlen;
     367           0 :     fd_memcpy( header.meta.pubkey, pubkey, sizeof(fd_pubkey_t) );
     368             :     /* Account Meta */
     369           0 :     header.info.lamports               = metadata->info.lamports;
     370           0 :     header.info.rent_epoch             = header.info.lamports ? metadata->info.rent_epoch : 0UL;
     371           0 :     fd_memcpy( header.info.owner, metadata->info.owner, sizeof(fd_pubkey_t) );
     372           0 :     header.info.executable             = metadata->info.executable;
     373             :     /* Hash */
     374           0 :     fd_memcpy( &header.hash, metadata->hash, sizeof(fd_hash_t) );
     375             : 
     376           0 :     err = fd_tar_writer_write_file_data( writer, &header, sizeof(fd_solana_account_hdr_t) );
     377           0 :     if( FD_UNLIKELY( err ) ) {
     378           0 :       FD_LOG_ERR(( "Unable to stream out account header to tar archive" ));
     379           0 :     }
     380             : 
     381             :     /* Write out the file data. */
     382             : 
     383           0 :     err = fd_tar_writer_write_file_data( writer, acc_data, metadata->dlen );
     384           0 :     if( FD_UNLIKELY( err ) ) {
     385           0 :       FD_LOG_ERR(( "Unable to stream out account data to tar archive" ));
     386           0 :     }
     387             : 
     388           0 :     ulong align_sz = fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN ) - metadata->dlen;
     389           0 :     err = fd_tar_writer_write_file_data( writer, padding, align_sz );
     390           0 :     if( FD_UNLIKELY( err ) ) {
     391           0 :       FD_LOG_ERR( ("Unable to stream out account padding to tar archive" ));
     392           0 :     }
     393           0 :   }
     394             : 
     395           0 :   err = fd_tar_writer_fini_file( writer );
     396           0 :   if( FD_UNLIKELY( err ) ) {
     397           0 :     FD_LOG_ERR(( "Unable to finalize previous accounts file" ));
     398           0 :   }
     399             : 
     400             :   /* Now write out the append vec for the snapshot slot. Again, this is needed
     401             :      because the snapshot slot's accounts must be in their append vec in order
     402             :      to verify the bank hash for the snapshot slot in the Agave client. */
     403             : 
     404           0 :   fd_snapshot_acc_vec_t * curr_accs = &accounts_db->storages[ 0UL ].account_vecs[ 0UL ];
     405           0 :   err = snprintf( buffer, FD_SNAPSHOT_DIR_MAX, "accounts/%lu.%lu", snapshot_ctx->slot, curr_accs->id );
     406           0 :   if( FD_UNLIKELY( err<0 ) ) {
     407           0 :     FD_LOG_ERR(( "Unable to format current accounts name string" ));
     408           0 :   }
     409             : 
     410           0 :   err = fd_tar_writer_new_file( writer, buffer );
     411           0 :   if( FD_UNLIKELY( err ) ) {
     412           0 :     FD_LOG_ERR(( "Unable to create current accounts file" ));
     413           0 :   }
     414             : 
     415           0 :   for( ulong i=0UL; i<snapshot_slot_key_cnt; i++ ) {
     416             :     
     417           0 :     fd_pubkey_t const * pubkey = snapshot_slot_keys[i];
     418           0 :     fd_funk_rec_key_t key = fd_acc_funk_key( pubkey );
     419             : 
     420           0 :     fd_funk_rec_t const * rec = fd_funk_rec_query( funk, NULL, &key );
     421           0 :     if( FD_UNLIKELY( !rec ) ) {
     422           0 :       FD_LOG_ERR(( "Previously found record can no longer be found" ));
     423           0 :     }
     424             : 
     425           0 :     int                 is_tombstone = rec->flags & FD_FUNK_REC_FLAG_ERASE;
     426           0 :     uchar       const * raw          = fd_funk_val( rec, fd_funk_wksp( funk ) );
     427           0 :     fd_account_meta_t * metadata     = is_tombstone ? fd_snapshot_create_get_default_meta( fd_funk_rec_get_erase_data( rec ) ) : 
     428           0 :                                                       (fd_account_meta_t*)raw;
     429             : 
     430           0 :     if( FD_UNLIKELY( !metadata ) ) {
     431           0 :       FD_LOG_ERR(( "Record should have non-NULL metadata" ));
     432           0 :     }
     433             : 
     434           0 :     if( FD_UNLIKELY( metadata->magic!=FD_ACCOUNT_META_MAGIC ) ) {
     435           0 :       FD_LOG_ERR(( "Record should have valid magic" ));
     436           0 :     }
     437             : 
     438           0 :     uchar const * acc_data = raw + metadata->hlen;
     439             : 
     440           0 :     curr_accs->file_sz += sizeof(fd_solana_account_hdr_t) + fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN );
     441             : 
     442             :     /* Write out the header. */
     443           0 :     fd_solana_account_hdr_t header = {0};
     444             :     /* Stored meta */
     445           0 :     header.meta.write_version_obsolete = 0UL;
     446           0 :     header.meta.data_len               = metadata->dlen;
     447           0 :     fd_memcpy( header.meta.pubkey, pubkey, sizeof(fd_pubkey_t) );
     448             :     /* Account Meta */
     449           0 :     header.info.lamports               = metadata->info.lamports;
     450           0 :     header.info.rent_epoch             = header.info.lamports ? metadata->info.rent_epoch : 0UL;
     451           0 :     fd_memcpy( header.info.owner, metadata->info.owner, sizeof(fd_pubkey_t) );
     452           0 :     header.info.executable             = metadata->info.executable;
     453             :     /* Hash */
     454           0 :     fd_memcpy( &header.hash, metadata->hash, sizeof(fd_hash_t) );
     455             : 
     456             : 
     457           0 :     err = fd_tar_writer_write_file_data( writer, &header, sizeof(fd_solana_account_hdr_t) );
     458           0 :     if( FD_UNLIKELY( err ) ) {
     459           0 :       FD_LOG_ERR(( "Unable to stream out account header to tar archive" ));
     460           0 :     }
     461           0 :     err = fd_tar_writer_write_file_data( writer, acc_data, metadata->dlen );
     462           0 :     if( FD_UNLIKELY( err ) ) {
     463           0 :       FD_LOG_ERR(( "Unable to stream out account data to tar archive" ));
     464           0 :     }
     465           0 :     ulong align_sz = fd_ulong_align_up( metadata->dlen, FD_SNAPSHOT_ACC_ALIGN ) - metadata->dlen;
     466           0 :     err = fd_tar_writer_write_file_data( writer, padding, align_sz );
     467           0 :     if( FD_UNLIKELY( err ) ) {
     468           0 :       FD_LOG_ERR(( "Unable to stream out account padding to tar archive" ));
     469           0 :     }
     470           0 :   }
     471             : 
     472           0 :   err = fd_tar_writer_fini_file( writer );
     473           0 :   if( FD_UNLIKELY( err ) ) {
     474           0 :     FD_LOG_ERR(( "Unable to finish writing out file" ));
     475           0 :   }
     476             : 
     477             :   /* TODO: At this point we must implement compaction to the snapshot service. 
     478             :      Without this, we are actually not cleaning up any tombstones from funk. */
     479             : 
     480           0 :   if( snapshot_ctx->is_incremental ) {
     481           0 :     fd_funk_start_write( funk );
     482           0 :     err = fd_funk_rec_forget( funk, tombstones, tombstones_cnt );
     483           0 :     if( FD_UNLIKELY( err!=FD_FUNK_SUCCESS ) ) {
     484           0 :       FD_LOG_ERR(( "Unable to forget tombstones" ));
     485           0 :     }
     486           0 :     FD_LOG_NOTICE(( "Compacted %lu tombstone records", tombstones_cnt ));
     487           0 :     fd_funk_end_write( funk );
     488           0 :   }
     489             : 
     490           0 :   fd_valloc_free( snapshot_ctx->valloc, snapshot_slot_keys );
     491           0 :   fd_valloc_free( snapshot_ctx->valloc, tombstones );
     492             : 
     493           0 : }
     494             : 
     495             : static void
     496             : fd_snapshot_create_serialiable_stakes( fd_snapshot_ctx_t        * snapshot_ctx,
     497             :                                        fd_stakes_t              * old_stakes,
     498           0 :                                        fd_stakes_serializable_t * new_stakes ) {
     499             : 
     500             : /* The deserialized stakes cache that is used by the runtime can't be
     501             :      reserialized into the format that Agave uses. For every vote account
     502             :      in the stakes struct, the Firedancer client holds a decoded copy of the 
     503             :      vote state. However, this vote state can't be reserialized back into the 
     504             :      full vote account data. 
     505             :      
     506             :      This poses a problem in the Agave client client because upon boot, Agave
     507             :      verifies that for all of the vote accounts in the stakes struct, the data
     508             :      in the cache is the same as the data in the accounts db.
     509             :      
     510             :      The other problem is that the Firedancer stakes cache does not evict old
     511             :      entries and doesn't update delegations within the cache. The cache will
     512             :      just insert new pubkeys as stake accounts are created/delegated to. To
     513             :      make the cache conformant for the snapshot, old accounts should be removed
     514             :      from the snapshot and all of the delegations should be updated. */
     515             : 
     516             :   /* First populate the vote accounts using the vote accounts/stakes cache. 
     517             :      We can populate over all of the fields except we can't reserialize the
     518             :      vote account data. Instead we will copy over the raw contents of all of
     519             :      the vote accounts. */
     520             : 
     521           0 :   ulong vote_accounts_len                      = fd_vote_accounts_pair_t_map_size( old_stakes->vote_accounts.vote_accounts_pool, old_stakes->vote_accounts.vote_accounts_root );
     522           0 :   new_stakes->vote_accounts.vote_accounts_pool = fd_vote_accounts_pair_serializable_t_map_alloc( snapshot_ctx->valloc, fd_ulong_max(vote_accounts_len, 15000 ) );
     523           0 :   new_stakes->vote_accounts.vote_accounts_root = NULL;
     524             : 
     525           0 :   for( fd_vote_accounts_pair_t_mapnode_t * n = fd_vote_accounts_pair_t_map_minimum(
     526           0 :        old_stakes->vote_accounts.vote_accounts_pool,
     527           0 :        old_stakes->vote_accounts.vote_accounts_root );
     528           0 :        n;
     529           0 :        n = fd_vote_accounts_pair_t_map_successor( old_stakes->vote_accounts.vote_accounts_pool, n ) ) {
     530             :     
     531           0 :     fd_vote_accounts_pair_serializable_t_mapnode_t * new_node = fd_vote_accounts_pair_serializable_t_map_acquire( new_stakes->vote_accounts.vote_accounts_pool );
     532           0 :     new_node->elem.key   = n->elem.key;
     533           0 :     new_node->elem.stake = n->elem.stake;
     534             :     /* Now to populate the value, lookup the account using the acc mgr */
     535           0 :     FD_BORROWED_ACCOUNT_DECL( vote_acc );
     536           0 :     int err = fd_acc_mgr_view( snapshot_ctx->acc_mgr, NULL, &n->elem.key, vote_acc );
     537           0 :     if( FD_UNLIKELY( err ) ) {
     538           0 :       FD_LOG_ERR(( "Failed to view vote account from stakes cache %s", FD_BASE58_ENC_32_ALLOCA(&n->elem.key) ));
     539           0 :     }
     540             : 
     541           0 :     new_node->elem.value.lamports   = vote_acc->const_meta->info.lamports;
     542           0 :     new_node->elem.value.data_len   = vote_acc->const_meta->dlen;
     543           0 :     new_node->elem.value.data       = fd_valloc_malloc( snapshot_ctx->valloc, 8UL, vote_acc->const_meta->dlen );
     544           0 :     fd_memcpy( new_node->elem.value.data, vote_acc->const_data, vote_acc->const_meta->dlen );
     545           0 :     fd_memcpy( &new_node->elem.value.owner, &vote_acc->const_meta->info.owner, sizeof(fd_pubkey_t) );
     546           0 :     new_node->elem.value.executable = vote_acc->const_meta->info.executable;
     547           0 :     new_node->elem.value.rent_epoch = vote_acc->const_meta->info.rent_epoch;
     548           0 :     fd_vote_accounts_pair_serializable_t_map_insert( new_stakes->vote_accounts.vote_accounts_pool, &new_stakes->vote_accounts.vote_accounts_root, new_node );
     549             : 
     550           0 :   }
     551             : 
     552             :   /* Stale stake delegations should also be removed or updated in the cache. 
     553             :      TODO: This will likely be changed in the near future as the stake
     554             :      program is migrated to a bpf program. It will likely be replaced by an
     555             :      index of stake/vote accounts. */
     556             : 
     557           0 :   FD_BORROWED_ACCOUNT_DECL( stake_acc );
     558           0 :   fd_delegation_pair_t_mapnode_t *      nn = NULL;
     559           0 :   for( fd_delegation_pair_t_mapnode_t * n  = fd_delegation_pair_t_map_minimum(
     560           0 :       old_stakes->stake_delegations_pool, old_stakes->stake_delegations_root ); n; n=nn ) {
     561             : 
     562           0 :     nn = fd_delegation_pair_t_map_successor( old_stakes->stake_delegations_pool, n );
     563             :     
     564           0 :     int err = fd_acc_mgr_view( snapshot_ctx->acc_mgr, NULL, &n->elem.account, stake_acc );
     565           0 :     if( FD_UNLIKELY( err ) ) {
     566             :       /* If the stake account doesn't exist, the cache is stale and the entry
     567             :          just needs to be evicted. */
     568           0 :       fd_delegation_pair_t_map_remove( old_stakes->stake_delegations_pool, &old_stakes->stake_delegations_root, n );
     569           0 :       fd_delegation_pair_t_map_release( old_stakes->stake_delegations_pool, n );
     570           0 :     } else {
     571             :       /* Otherwise, just update the delegation in case it is stale. */
     572           0 :       fd_bincode_decode_ctx_t ctx = {
     573           0 :         .data    = stake_acc->const_data,
     574           0 :         .dataend = stake_acc->const_data + stake_acc->const_meta->dlen,
     575           0 :         .valloc  = snapshot_ctx->valloc
     576           0 :       };
     577           0 :       fd_stake_state_v2_t stake_state = {0};
     578           0 :       err = fd_stake_state_v2_decode( &stake_state, &ctx );
     579           0 :       if( FD_UNLIKELY( err ) ) {
     580           0 :         FD_LOG_ERR(( "Failed to decode stake state" ));
     581           0 :       }
     582           0 :       n->elem.delegation = stake_state.inner.stake.stake.delegation;
     583           0 :     }
     584           0 :   }
     585             : 
     586             :   /* Copy over the rest of the fields as they are the same. */
     587             : 
     588           0 :   new_stakes->stake_delegations_pool = old_stakes->stake_delegations_pool;
     589           0 :   new_stakes->stake_delegations_root = old_stakes->stake_delegations_root;
     590           0 :   new_stakes->unused                 = old_stakes->unused;
     591           0 :   new_stakes->epoch                  = old_stakes->epoch;
     592           0 :   new_stakes->stake_history          = old_stakes->stake_history;
     593             : 
     594           0 : }
     595             : 
     596             : static inline void
     597             : fd_snapshot_create_populate_bank( fd_snapshot_ctx_t *                snapshot_ctx,
     598           0 :                                   fd_serializable_versioned_bank_t * bank ) {
     599             : 
     600           0 :   fd_slot_bank_t  * slot_bank  = &snapshot_ctx->slot_bank;
     601           0 :   fd_epoch_bank_t * epoch_bank = &snapshot_ctx->epoch_bank;
     602             : 
     603             :   /* The blockhash queue has to be copied over along with all of its entries.
     604             :      As a note, the size is 300 but in fact is of size 301 due to a knwon bug
     605             :      in the agave client that is emulated by the firedancer client. */
     606             : 
     607           0 :   bank->blockhash_queue.last_hash_index = slot_bank->block_hash_queue.last_hash_index;
     608           0 :   bank->blockhash_queue.last_hash       = fd_valloc_malloc( snapshot_ctx->valloc, FD_HASH_ALIGN, FD_HASH_FOOTPRINT );
     609           0 :   fd_memcpy( bank->blockhash_queue.last_hash, slot_bank->block_hash_queue.last_hash, sizeof(fd_hash_t) );
     610             : 
     611           0 :   bank->blockhash_queue.ages_len = fd_hash_hash_age_pair_t_map_size( slot_bank->block_hash_queue.ages_pool, slot_bank->block_hash_queue.ages_root);
     612           0 :   bank->blockhash_queue.ages     = fd_valloc_malloc( snapshot_ctx->valloc, FD_HASH_HASH_AGE_PAIR_ALIGN, bank->blockhash_queue.ages_len * sizeof(fd_hash_hash_age_pair_t) );
     613           0 :   bank->blockhash_queue.max_age  = FD_BLOCKHASH_QUEUE_SIZE;
     614             : 
     615           0 :   fd_block_hash_queue_t             * queue               = &slot_bank->block_hash_queue;
     616           0 :   fd_hash_hash_age_pair_t_mapnode_t * nn                  = NULL;
     617           0 :   ulong                               blockhash_queue_idx = 0UL;
     618           0 :   for( fd_hash_hash_age_pair_t_mapnode_t * n = fd_hash_hash_age_pair_t_map_minimum( queue->ages_pool, queue->ages_root ); n; n = nn ) {
     619           0 :     nn = fd_hash_hash_age_pair_t_map_successor( queue->ages_pool, n );
     620           0 :     fd_memcpy( &bank->blockhash_queue.ages[ blockhash_queue_idx++ ], &n->elem, sizeof(fd_hash_hash_age_pair_t) );
     621           0 :   }
     622             : 
     623             : 
     624             : 
     625             :   /* Ancestor can be omitted to boot off of for both clients */
     626             : 
     627           0 :   bank->ancestors_len                         = 0UL;
     628           0 :   bank->ancestors                             = NULL;
     629             : 
     630           0 :   bank->hash                                  = slot_bank->banks_hash;
     631           0 :   bank->parent_hash                           = slot_bank->prev_banks_hash;
     632           0 :   bank->parent_slot                           = slot_bank->prev_slot;
     633           0 :   bank->hard_forks                            = slot_bank->hard_forks;
     634           0 :   bank->transaction_count                     = slot_bank->transaction_count;
     635           0 :   bank->signature_count                       = slot_bank->parent_signature_cnt;
     636           0 :   bank->capitalization                        = slot_bank->capitalization;
     637           0 :   bank->tick_height                           = slot_bank->tick_height;
     638           0 :   bank->max_tick_height                       = slot_bank->max_tick_height;
     639             : 
     640             :   /* The hashes_per_tick needs to be copied over from the epoch bank because
     641             :      the pointer could go out of bounds during an epoch boundary. */
     642           0 :   bank->hashes_per_tick                       = fd_valloc_malloc( snapshot_ctx->valloc, alignof(ulong), sizeof(ulong) );
     643           0 :   fd_memcpy( bank->hashes_per_tick, &epoch_bank->hashes_per_tick, sizeof(ulong) );
     644             : 
     645           0 :   bank->ticks_per_slot                        = FD_TICKS_PER_SLOT;
     646           0 :   bank->ns_per_slot                           = epoch_bank->ns_per_slot;
     647           0 :   bank->genesis_creation_time                 = epoch_bank->genesis_creation_time;
     648           0 :   bank->slots_per_year                        = epoch_bank->slots_per_year;
     649             : 
     650             :   /* This value can be set to 0 because the Agave client recomputes this value
     651             :      and the firedancer client doesn't use it. */
     652             : 
     653           0 :   bank->accounts_data_len                     = 0UL;
     654             : 
     655           0 :   bank->slot                                  = snapshot_ctx->slot;
     656           0 :   bank->epoch                                 = fd_slot_to_epoch( &epoch_bank->epoch_schedule, bank->slot, NULL );
     657           0 :   bank->block_height                          = slot_bank->block_height;
     658             : 
     659             :   /* Collector id can be left as null for both clients */
     660             : 
     661           0 :   fd_memset( &bank->collector_id, 0, sizeof(fd_pubkey_t) );
     662             : 
     663           0 :   bank->collector_fees                        = slot_bank->collected_execution_fees + slot_bank->collected_priority_fees;
     664           0 :   bank->fee_calculator.lamports_per_signature = slot_bank->lamports_per_signature;
     665           0 :   bank->fee_rate_governor                     = slot_bank->fee_rate_governor;
     666           0 :   bank->collected_rent                        = slot_bank->collected_rent;
     667             : 
     668           0 :   bank->rent_collector.epoch                  = bank->epoch;
     669           0 :   bank->rent_collector.epoch_schedule         = epoch_bank->rent_epoch_schedule;
     670           0 :   bank->rent_collector.slots_per_year         = epoch_bank->slots_per_year;
     671           0 :   bank->rent_collector.rent                   = epoch_bank->rent;
     672             : 
     673           0 :   bank->epoch_schedule                        = epoch_bank->epoch_schedule;
     674           0 :   bank->inflation                             = epoch_bank->inflation;
     675             :   
     676             :   /* Unused accounts can be left as NULL for both clients. */
     677             : 
     678           0 :   fd_memset( &bank->unused_accounts, 0, sizeof(fd_unused_accounts_t) );
     679             : 
     680             :   /* We need to copy over the stakes for two epochs despite the Agave client
     681             :      providing the stakes for 6 epochs. These stakes need to be copied over
     682             :      because of the fact that the leader schedule computation uses the two
     683             :      previous epoch stakes.
     684             :      
     685             :      TODO: This field has been deprecated by agave and has instead been
     686             :      replaced with the versioned epoch stakes field in the manifest. The
     687             :      firedancer client will populate the deprecated field. */
     688             : 
     689           0 :   fd_epoch_epoch_stakes_pair_t * relevant_epoch_stakes = fd_valloc_malloc( snapshot_ctx->valloc, FD_EPOCH_EPOCH_STAKES_PAIR_ALIGN, 2UL * sizeof(fd_epoch_epoch_stakes_pair_t) );
     690           0 :   fd_memset( &relevant_epoch_stakes[0], 0UL, sizeof(fd_epoch_epoch_stakes_pair_t) );
     691           0 :   fd_memset( &relevant_epoch_stakes[1], 0UL, sizeof(fd_epoch_epoch_stakes_pair_t) );
     692           0 :   relevant_epoch_stakes[0].key                        = bank->epoch;
     693           0 :   relevant_epoch_stakes[0].value.stakes.vote_accounts = slot_bank->epoch_stakes;
     694           0 :   relevant_epoch_stakes[1].key                        = bank->epoch+1UL;
     695           0 :   relevant_epoch_stakes[1].value.stakes.vote_accounts = epoch_bank->next_epoch_stakes;
     696             : 
     697           0 :   bank->epoch_stakes_len = 2UL;
     698           0 :   bank->epoch_stakes     = relevant_epoch_stakes;
     699           0 :   bank->is_delta         = snapshot_ctx->is_incremental;
     700             : 
     701             :   /* The firedancer runtime currently maintains a version of the stakes which
     702             :      can't be reserialized into a format that is compatible with the Solana
     703             :      snapshot format. Therefore, we must recompute the data structure using
     704             :      the pubkeys from the stakes cache that is currently in the epoch context. */
     705             : 
     706           0 :   fd_snapshot_create_serialiable_stakes( snapshot_ctx, &epoch_bank->stakes, &bank->stakes );
     707             : 
     708           0 : }
     709             : 
     710             : static inline void
     711           0 : fd_snapshot_create_setup_and_validate_ctx( fd_snapshot_ctx_t * snapshot_ctx ) {
     712             : 
     713           0 :   fd_funk_t * funk = snapshot_ctx->funk;
     714             : 
     715             :   /* Initialize the account manager. */
     716             : 
     717           0 :   uchar * mem = fd_valloc_malloc( snapshot_ctx->valloc, FD_ACC_MGR_ALIGN, FD_ACC_MGR_FOOTPRINT );
     718           0 :   snapshot_ctx->acc_mgr = fd_acc_mgr_new( mem, funk );
     719           0 :   if( FD_UNLIKELY( !snapshot_ctx->acc_mgr ) ) {
     720           0 :     FD_LOG_ERR(( "Failed to initialize account manager" ));
     721           0 :   }
     722             : 
     723             :   /* First the epoch bank. */
     724             : 
     725           0 :   fd_funk_rec_key_t     epoch_id  = fd_runtime_epoch_bank_key();
     726           0 :   fd_funk_rec_t const * epoch_rec = fd_funk_rec_query( funk, NULL, &epoch_id );
     727           0 :   if( FD_UNLIKELY( !epoch_rec ) ) {
     728           0 :     FD_LOG_ERR(( "Failed to read epoch bank record: missing record" ));
     729           0 :   }
     730           0 :   void * epoch_val = fd_funk_val( epoch_rec, fd_funk_wksp( funk ) );
     731             : 
     732           0 :   if( FD_UNLIKELY( fd_funk_val_sz( epoch_rec )<sizeof(uint) ) ) {
     733           0 :     FD_LOG_ERR(( "Failed to read epoch bank record: empty record" ));
     734           0 :   }
     735             : 
     736           0 :   uint epoch_magic = *(uint*)epoch_val;
     737             : 
     738           0 :   fd_bincode_decode_ctx_t epoch_decode_ctx = {
     739           0 :     .data    = (uchar*)epoch_val + sizeof(uint),
     740           0 :     .dataend = (uchar*)epoch_val + fd_funk_val_sz( epoch_rec ),
     741           0 :     .valloc  = snapshot_ctx->valloc
     742           0 :   };
     743             : 
     744           0 :   if( FD_UNLIKELY( epoch_magic!=FD_RUNTIME_ENC_BINCODE ) ) {
     745           0 :     FD_LOG_ERR(( "Epoch bank record has wrong magic" ));
     746           0 :   }
     747             : 
     748           0 :   int err = fd_epoch_bank_decode( &snapshot_ctx->epoch_bank, &epoch_decode_ctx );
     749           0 :   if( FD_UNLIKELY( err!=FD_BINCODE_SUCCESS ) ) {
     750           0 :     FD_LOG_ERR(( "Failed to decode epoch bank" ));
     751           0 :   }
     752             : 
     753             :   /* Now the slot bank. */
     754             : 
     755           0 :   fd_funk_rec_key_t     slot_id  = fd_runtime_slot_bank_key();
     756           0 :   fd_funk_rec_t const * slot_rec = fd_funk_rec_query( funk, NULL, &slot_id );
     757           0 :   if( FD_UNLIKELY( !slot_rec ) ) {
     758           0 :     FD_LOG_ERR(( "Failed to read slot bank record: missing record" ));
     759           0 :   }
     760           0 :   void * slot_val = fd_funk_val( slot_rec, fd_funk_wksp( funk ) );
     761             : 
     762           0 :   if( FD_UNLIKELY( fd_funk_val_sz( slot_rec )<sizeof(uint) ) ) {
     763           0 :     FD_LOG_ERR(( "Failed to read slot bank record: empty record" ));
     764           0 :   }
     765             : 
     766           0 :   uint slot_magic = *(uint*)slot_val;
     767             : 
     768           0 :   fd_bincode_decode_ctx_t slot_decode_ctx = {
     769           0 :     .data    = (uchar*)slot_val + sizeof(uint),
     770           0 :     .dataend = (uchar*)slot_val + fd_funk_val_sz( slot_rec ),
     771           0 :     .valloc  = snapshot_ctx->valloc
     772           0 :   };
     773             : 
     774           0 :   if( FD_UNLIKELY( slot_magic!=FD_RUNTIME_ENC_BINCODE ) ) {
     775           0 :     FD_LOG_ERR(( "Slot bank record has wrong magic" ));
     776           0 :   }
     777             : 
     778           0 :   err = fd_slot_bank_decode( &snapshot_ctx->slot_bank, &slot_decode_ctx );
     779           0 :   if( FD_UNLIKELY( err!=FD_BINCODE_SUCCESS ) ) {
     780           0 :     FD_LOG_ERR(( "Failed to decode slot bank" ));
     781           0 :   }
     782             : 
     783             :   /* Validate that the snapshot context is setup correctly */
     784             : 
     785           0 :   if( FD_UNLIKELY( !snapshot_ctx->out_dir ) ) {
     786           0 :     FD_LOG_ERR(( "Snapshot directory is not set" ));
     787           0 :   }
     788             : 
     789           0 :   if( FD_UNLIKELY( snapshot_ctx->slot>snapshot_ctx->slot_bank.slot ) ) {
     790           0 :     FD_LOG_ERR(( "Snapshot slot=%lu is greater than the current slot=%lu", 
     791           0 :                      snapshot_ctx->slot, snapshot_ctx->slot_bank.slot ));
     792           0 :   }
     793             : 
     794             :   /* Truncate the two files used for snapshot creation and seek to its start. */
     795             : 
     796           0 :   long seek = lseek( snapshot_ctx->tmp_fd, 0, SEEK_SET );
     797           0 :   if( FD_UNLIKELY( seek ) ) {
     798           0 :     FD_LOG_ERR(( "Failed to seek to the start of the file" ));
     799           0 :   }
     800             : 
     801           0 :   if( FD_UNLIKELY( ftruncate( snapshot_ctx->tmp_fd, 0UL ) < 0 ) ) {
     802           0 :     FD_LOG_ERR(( "Failed to truncate the temporary file" ));
     803           0 :   }
     804             : 
     805           0 :   seek = lseek( snapshot_ctx->snapshot_fd, 0, SEEK_SET );
     806           0 :   if( FD_UNLIKELY( seek ) ) {
     807           0 :     FD_LOG_ERR(( "Failed to seek to the start of the file" ));
     808           0 :   }
     809             : 
     810           0 :   if( FD_UNLIKELY( ftruncate( snapshot_ctx->snapshot_fd, 0UL ) < 0 ) ) {
     811           0 :     FD_LOG_ERR(( "Failed to truncate the snapshot file" ));
     812           0 :   }
     813             : 
     814           0 : }
     815             : 
     816             : static inline void
     817           0 : fd_snapshot_create_setup_writer( fd_snapshot_ctx_t * snapshot_ctx ) {
     818             :   
     819             :   /* Setup a tar writer. */
     820             : 
     821           0 :   uchar * writer_mem   = fd_valloc_malloc( snapshot_ctx->valloc, fd_tar_writer_align(), fd_tar_writer_footprint() );
     822           0 :   snapshot_ctx->writer = fd_tar_writer_new( writer_mem, snapshot_ctx->tmp_fd );
     823           0 :   if( FD_UNLIKELY( !snapshot_ctx->writer ) ) {
     824           0 :     FD_LOG_ERR(( "Unable to create a tar writer" ));
     825           0 :   }
     826           0 : }
     827             : 
     828             : static inline void
     829           0 : fd_snapshot_create_write_version( fd_snapshot_ctx_t * snapshot_ctx ) {
     830             : 
     831             :   /* The first file in the tar archive should be the version file.. */
     832             : 
     833           0 :   int err = fd_tar_writer_new_file( snapshot_ctx->writer, FD_SNAPSHOT_VERSION_FILE );
     834           0 :   if( FD_UNLIKELY( err ) ) {
     835           0 :     FD_LOG_ERR(( "Failed to create the version file" ));
     836           0 :   }
     837             : 
     838           0 :   err = fd_tar_writer_write_file_data( snapshot_ctx->writer, FD_SNAPSHOT_VERSION, FD_SNAPSHOT_VERSION_LEN);
     839           0 :   if( FD_UNLIKELY( err ) ) {
     840           0 :     FD_LOG_ERR(( "Failed to create the version file" ));
     841           0 :   }
     842             : 
     843           0 :   err = fd_tar_writer_fini_file( snapshot_ctx->writer );
     844           0 :   if( FD_UNLIKELY( err ) ) {
     845           0 :     FD_LOG_ERR(( "Failed to create the version file" ));
     846           0 :   }
     847             : 
     848           0 : }
     849             : 
     850             : static inline void
     851           0 : fd_snapshot_create_write_status_cache( fd_snapshot_ctx_t *  snapshot_ctx ) {
     852             : 
     853           0 :   FD_SCRATCH_SCOPE_BEGIN {
     854             : 
     855             :   /* First convert the existing status cache into a snapshot-friendly format. */
     856             : 
     857           0 :   fd_bank_slot_deltas_t slot_deltas_new = {0};
     858           0 :   int err = fd_txncache_get_entries( snapshot_ctx->status_cache,
     859           0 :                            &slot_deltas_new );
     860           0 :   if( FD_UNLIKELY( err ) ) {
     861           0 :     FD_LOG_ERR(( "Failed to get entries from the status cache" ));
     862           0 :   }
     863           0 :   ulong   bank_slot_deltas_sz = fd_bank_slot_deltas_size( &slot_deltas_new );
     864           0 :   uchar * out_status_cache    = fd_valloc_malloc( snapshot_ctx->valloc,
     865           0 :                                                   FD_BANK_SLOT_DELTAS_ALIGN, 
     866           0 :                                                   bank_slot_deltas_sz );
     867           0 :   fd_bincode_encode_ctx_t encode_status_cache = {
     868           0 :     .data    = out_status_cache,
     869           0 :     .dataend = out_status_cache + bank_slot_deltas_sz,
     870           0 :   };
     871           0 :   if( FD_UNLIKELY( fd_bank_slot_deltas_encode( &slot_deltas_new, &encode_status_cache ) ) ) {
     872           0 :     FD_LOG_ERR(( "Failed to encode the status cache" ));
     873           0 :   }
     874             : 
     875             :   /* Now write out the encoded buffer to the tar archive. */
     876             : 
     877           0 :   err = fd_tar_writer_new_file( snapshot_ctx->writer, FD_SNAPSHOT_STATUS_CACHE_FILE );
     878           0 :   if( FD_UNLIKELY( err ) ) {
     879           0 :     FD_LOG_ERR(( "Failed to create the status cache file" ));
     880           0 :   }
     881           0 :   err = fd_tar_writer_write_file_data( snapshot_ctx->writer, out_status_cache, bank_slot_deltas_sz );
     882           0 :   if( FD_UNLIKELY( err ) ) {
     883           0 :     FD_LOG_ERR(( "Failed to create the status cache file" ));
     884           0 :   }
     885           0 :   err = fd_tar_writer_fini_file( snapshot_ctx->writer );
     886           0 :   if( FD_UNLIKELY( err ) ) {
     887           0 :     FD_LOG_ERR(( "Failed to create the status cache file" ));
     888           0 :   }
     889             : 
     890             :   /* Registers all roots and unconstipates the status cache. */
     891             : 
     892           0 :   fd_txncache_flush_constipated_slots( snapshot_ctx->status_cache );
     893             : 
     894           0 :   fd_valloc_free( snapshot_ctx->valloc, out_status_cache );
     895             : 
     896           0 :   } FD_SCRATCH_SCOPE_END;
     897             : 
     898           0 : }
     899             : 
     900             : static inline void
     901             : fd_snapshot_create_write_manifest_and_acc_vecs( fd_snapshot_ctx_t * snapshot_ctx,
     902             :                                                 fd_hash_t *         out_hash,
     903           0 :                                                 ulong *             out_capitalization ) {
     904             : 
     905             : 
     906           0 :   fd_solana_manifest_serializable_t manifest = {0};
     907             :   
     908             :   /* Copy in all the fields of the bank. */
     909             : 
     910           0 :   fd_snapshot_create_populate_bank( snapshot_ctx, &manifest.bank );
     911             : 
     912             :   /* Populate the rest of the manifest, except for the append vec index. */
     913             : 
     914           0 :   manifest.lamports_per_signature                = snapshot_ctx->slot_bank.lamports_per_signature;
     915           0 :   manifest.epoch_account_hash                    = &snapshot_ctx->slot_bank.epoch_account_hash;
     916             : 
     917             :   /* FIXME: The versioned epoch stakes needs to be implemented. Right now if
     918             :      we try to create a snapshot on or near an epoch boundary, we will produce
     919             :      an invalid snapshot. */
     920             : 
     921           0 :   manifest.versioned_epoch_stakes_len            = 0UL;
     922           0 :   manifest.versioned_epoch_stakes                = NULL;
     923             : 
     924             :   /* Populate the append vec index and write out the corresponding acc files. */
     925             : 
     926           0 :   ulong incr_capitalization = 0UL;
     927           0 :   fd_snapshot_create_populate_acc_vecs( snapshot_ctx, &manifest, snapshot_ctx->writer, &incr_capitalization );
     928             : 
     929             :   /* Once the append vec index is populated and the hashes are calculated, 
     930             :      propogate the hashes to the correct fields. As a note, the last_snap_hash
     931             :      is the full snapshot's account hash. */
     932             : 
     933           0 :   if( snapshot_ctx->is_incremental ) {
     934           0 :     manifest.bank_incremental_snapshot_persistence->full_slot                  = snapshot_ctx->last_snap_slot;
     935           0 :     fd_memcpy( &manifest.bank_incremental_snapshot_persistence->full_hash, snapshot_ctx->last_snap_acc_hash, sizeof(fd_hash_t) );
     936           0 :     manifest.bank_incremental_snapshot_persistence->full_capitalization        = snapshot_ctx->last_snap_capitalization;
     937           0 :     manifest.bank_incremental_snapshot_persistence->incremental_hash           = snapshot_ctx->acc_hash;
     938           0 :     manifest.bank_incremental_snapshot_persistence->incremental_capitalization = incr_capitalization;
     939           0 :   } else {
     940           0 :     memcpy( out_hash, &manifest.accounts_db.bank_hash_info.accounts_hash, sizeof(fd_hash_t) );
     941           0 :     *out_capitalization = snapshot_ctx->slot_bank.capitalization;
     942           0 :   }
     943             : 
     944             :   /* At this point, all of the account files are written out and the append
     945             :      vec index is populated in the manifest. We have already reserved space
     946             :      in the archive for the manifest. All we need to do now is encode the 
     947             :      manifest and write it in. */
     948             : 
     949           0 :   ulong   manifest_sz  = fd_solana_manifest_serializable_size( &manifest ); 
     950           0 :   uchar * out_manifest = fd_valloc_malloc( snapshot_ctx->valloc, FD_SOLANA_MANIFEST_SERIALIZABLE_ALIGN, manifest_sz );
     951             : 
     952           0 :   fd_bincode_encode_ctx_t encode = { 
     953           0 :     .data    = out_manifest,
     954           0 :     .dataend = out_manifest + manifest_sz
     955           0 :   };
     956             : 
     957           0 :   int err = fd_solana_manifest_serializable_encode( &manifest, &encode );
     958           0 :   if( FD_UNLIKELY( err ) ) {
     959           0 :     FD_LOG_ERR(( "Failed to encode the manifest" ));
     960           0 :   }
     961             :   
     962           0 :   err = fd_tar_writer_fill_space( snapshot_ctx->writer, out_manifest, manifest_sz );
     963           0 :   if( FD_UNLIKELY( err ) ) {
     964           0 :     FD_LOG_ERR(( "Failed to write out the manifest" ));
     965           0 :   }
     966             : 
     967           0 :   void * mem = fd_tar_writer_delete( snapshot_ctx->writer );
     968           0 :   if( FD_UNLIKELY( !mem ) ) {
     969           0 :     FD_LOG_ERR(( "Unable to delete the tar writer" ));
     970           0 :   }
     971             : 
     972           0 :   fd_bincode_destroy_ctx_t destroy = {
     973           0 :     .valloc  = snapshot_ctx->valloc
     974           0 :   };
     975             : 
     976             :   /* This is kind of a hack but we need to do this so we don't accidentally 
     977             :      corrupt memory when we try to double destory. Everything below is
     978             :      things that aren't stack allocated from the manifest including the banks. */
     979             : 
     980           0 :   fd_stakes_serializable_destroy( &manifest.bank.stakes, &destroy );
     981           0 :   fd_block_hash_vec_destroy( &manifest.bank.blockhash_queue, &destroy );
     982           0 :   fd_valloc_free( snapshot_ctx->valloc, manifest.bank.epoch_stakes );
     983           0 :   fd_epoch_bank_destroy( &snapshot_ctx->epoch_bank, &destroy );
     984           0 :   fd_slot_bank_destroy( &snapshot_ctx->slot_bank, &destroy );
     985           0 :   if( snapshot_ctx->is_incremental ) {
     986           0 :     fd_valloc_free( snapshot_ctx->valloc, manifest.bank_incremental_snapshot_persistence );
     987           0 :   }
     988           0 :   fd_valloc_free( snapshot_ctx->valloc, out_manifest );  
     989             : 
     990           0 : }
     991             : 
     992             : static inline void
     993           0 : fd_snapshot_create_compress( fd_snapshot_ctx_t * snapshot_ctx ) {
     994             : 
     995             :   /* Compress the file using zstd. First open the non-compressed file and
     996             :      create a file for the compressed file. The reason why we can't do this
     997             :      as we stream out the snapshot archive is that we write back into the
     998             :      manifest buffer. 
     999             :      
    1000             :      TODO: A way to eliminate this and to just stream out
    1001             :      1 compressed file would be to totally precompute the index such that 
    1002             :      we don't have to write back into funk.
    1003             :      
    1004             :      TODO: Currently, the snapshot service interfaces directly with the zstd 
    1005             :      library but a generalized cstream defined in fd_zstd should be used 
    1006             :      instead. */
    1007             : 
    1008           0 :   ulong in_buf_sz   = ZSTD_CStreamInSize();
    1009           0 :   ulong zstd_buf_sz = ZSTD_CStreamOutSize();
    1010           0 :   ulong out_buf_sz  = ZSTD_CStreamOutSize();
    1011             : 
    1012           0 :   char * in_buf   = fd_valloc_malloc( snapshot_ctx->valloc, FD_ZSTD_CSTREAM_ALIGN, in_buf_sz );
    1013           0 :   char * zstd_buf = fd_valloc_malloc( snapshot_ctx->valloc, FD_ZSTD_CSTREAM_ALIGN, out_buf_sz );
    1014           0 :   char * out_buf  = fd_valloc_malloc( snapshot_ctx->valloc, FD_ZSTD_CSTREAM_ALIGN, out_buf_sz );
    1015             : 
    1016             :   /* Reopen the tarball and open/overwrite the filename for the compressed,
    1017             :      finalized full snapshot. Setup the zstd compression stream. */
    1018             : 
    1019           0 :   int err = 0;
    1020             : 
    1021           0 :   ZSTD_CStream * cstream = ZSTD_createCStream();
    1022           0 :   if( FD_UNLIKELY( !cstream ) ) {
    1023           0 :     FD_LOG_ERR(( "Failed to create the zstd compression stream" ));
    1024           0 :   }
    1025           0 :   ZSTD_initCStream( cstream, ZSTD_CLEVEL_DEFAULT ); 
    1026             : 
    1027           0 :   fd_io_buffered_ostream_t ostream[1];
    1028             : 
    1029           0 :   if( FD_UNLIKELY( !fd_io_buffered_ostream_init( ostream, snapshot_ctx->snapshot_fd, out_buf, out_buf_sz ) ) ) {
    1030           0 :     FD_LOG_ERR(( "Failed to initialize the ostream" ));
    1031           0 :   }
    1032             : 
    1033           0 :   long seek = lseek( snapshot_ctx->snapshot_fd, 0, SEEK_SET );
    1034           0 :   if( FD_UNLIKELY( seek!=0L ) ) {
    1035           0 :     FD_LOG_ERR(( "Failed to seek to the start of the file" ));
    1036           0 :   }
    1037             : 
    1038             :   /* At this point, the tar archive and the new zstd file is open. The zstd
    1039             :      streamer is still open. Now, we are ready to read in bytes and stream
    1040             :      compress them. We will keep going until we see an EOF in a tar archive. */
    1041             : 
    1042           0 :   ulong in_sz = in_buf_sz;
    1043             : 
    1044           0 :   ulong off = (ulong)lseek( snapshot_ctx->tmp_fd, 0, SEEK_SET );
    1045           0 :   if( FD_UNLIKELY( off ) ) {
    1046           0 :     FD_LOG_ERR(( "Failed to seek to the beginning of the file" ));
    1047           0 :   }
    1048             : 
    1049           0 :   while( in_sz==in_buf_sz ) {
    1050             : 
    1051             :     /* Read chunks from the file. There isn't really a need to use a streamed
    1052             :        reader here because we will read in the max size buffer for every single
    1053             :        file read except for the very last one.
    1054             :        
    1055             :        in_sz will only not equal in_buf_sz on the last read. */
    1056           0 :     err = fd_io_read( snapshot_ctx->tmp_fd, in_buf, 0UL, in_buf_sz, &in_sz );
    1057           0 :     if( FD_UNLIKELY( err ) ) {
    1058           0 :       FD_LOG_ERR(( "Failed to read in the file" ));
    1059           0 :     }
    1060             : 
    1061             :     /* Compress the in memory buffer and add it to the output stream. */
    1062             :   
    1063           0 :     ZSTD_inBuffer input = { in_buf, in_sz, 0UL };
    1064           0 :     while( input.pos<input.size ) {
    1065           0 :       ZSTD_outBuffer output = { zstd_buf, zstd_buf_sz, 0UL };
    1066           0 :       ulong          ret    = ZSTD_compressStream( cstream, &output, &input );
    1067             : 
    1068           0 :       if( FD_UNLIKELY( ZSTD_isError( ret ) ) ) {
    1069           0 :         FD_LOG_ERR(( "Compression error: %s\n", ZSTD_getErrorName( ret ) ));
    1070           0 :       }
    1071             : 
    1072           0 :       err = fd_io_buffered_ostream_write( ostream, zstd_buf, output.pos );
    1073           0 :       if( FD_UNLIKELY( err ) ) {
    1074           0 :         FD_LOG_ERR(( "Failed to write out the compressed file" ));
    1075           0 :       }
    1076           0 :     }
    1077           0 :   }
    1078             : 
    1079             :   /* Now flush any bytes left in the zstd buffer, cleanup open file 
    1080             :      descriptors, and deinit any data structures.  */
    1081             : 
    1082           0 :   ZSTD_outBuffer output    = { zstd_buf, zstd_buf_sz, 0UL };
    1083           0 :   ulong          remaining = ZSTD_endStream(  cstream, &output );
    1084             : 
    1085           0 :   if( FD_UNLIKELY( ZSTD_isError( remaining ) ) ) {
    1086           0 :     FD_LOG_ERR(( "Unable to end the zstd stream" ));
    1087           0 :   }   
    1088           0 :   if( output.pos>0UL ) {
    1089           0 :     fd_io_buffered_ostream_write( ostream, zstd_buf, output.pos );
    1090           0 :   }
    1091             : 
    1092           0 :   fd_valloc_free( snapshot_ctx->valloc, in_buf );
    1093           0 :   fd_valloc_free( snapshot_ctx->valloc, zstd_buf );
    1094           0 :   fd_valloc_free( snapshot_ctx->valloc, out_buf );
    1095             : 
    1096           0 :   ZSTD_freeCStream( cstream ); /* Works even if cstream is null */
    1097           0 :   err = fd_io_buffered_ostream_flush( ostream );
    1098           0 :   if( FD_UNLIKELY( err ) ) {
    1099           0 :     FD_LOG_ERR(( "Failed to flush the ostream" ));
    1100           0 :   }
    1101             : 
    1102             :   /* Assuming that there was a successful write, make the compressed
    1103             :      snapshot file readable and servable. */
    1104             : 
    1105           0 :   char tmp_directory_buf_zstd[ FD_SNAPSHOT_DIR_MAX ];
    1106           0 :   err = snprintf( tmp_directory_buf_zstd, FD_SNAPSHOT_DIR_MAX, "%s/%s", snapshot_ctx->out_dir, snapshot_ctx->is_incremental ? FD_SNAPSHOT_TMP_INCR_ARCHIVE_ZSTD : FD_SNAPSHOT_TMP_FULL_ARCHIVE_ZSTD );
    1107           0 :   if( FD_UNLIKELY( err<0 ) ) {
    1108           0 :     FD_LOG_ERR(( "Failed to format directory string" ));
    1109           0 :   }
    1110             : 
    1111           0 :   char directory_buf_zstd[ FD_SNAPSHOT_DIR_MAX ];
    1112           0 :   if( !snapshot_ctx->is_incremental ) {
    1113           0 :     err = snprintf( directory_buf_zstd, FD_SNAPSHOT_DIR_MAX, "%s/snapshot-%lu-%s.tar.zst", 
    1114           0 :                     snapshot_ctx->out_dir, snapshot_ctx->slot, FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->snap_hash) );
    1115           0 :   } else {
    1116           0 :     err = snprintf( directory_buf_zstd, FD_SNAPSHOT_DIR_MAX, "%s/incremental-snapshot-%lu-%lu-%s.tar.zst", 
    1117           0 :                     snapshot_ctx->out_dir, snapshot_ctx->last_snap_slot, snapshot_ctx->slot, FD_BASE58_ENC_32_ALLOCA(&snapshot_ctx->snap_hash) );
    1118           0 :   }
    1119             : 
    1120           0 :   if( FD_UNLIKELY( err<0 ) ) {
    1121           0 :     FD_LOG_ERR(( "Failed to format directory string" ));
    1122           0 :   }
    1123             : 
    1124           0 :   err = rename( tmp_directory_buf_zstd, directory_buf_zstd );
    1125           0 :   if( FD_UNLIKELY( err<0 ) ) {
    1126           0 :     FD_LOG_ERR(( "Failed to rename file from %s to %s (%i-%s)", tmp_directory_buf_zstd, directory_buf_zstd, errno, fd_io_strerror( errno ) ));
    1127           0 :   }
    1128             : 
    1129           0 : }
    1130             : 
    1131             : void
    1132             : fd_snapshot_create_new_snapshot( fd_snapshot_ctx_t * snapshot_ctx, 
    1133             :                                  fd_hash_t *         out_hash, 
    1134           0 :                                  ulong *             out_capitalization ) {
    1135             : 
    1136           0 :   FD_SCRATCH_SCOPE_BEGIN {
    1137             : 
    1138           0 :   FD_LOG_NOTICE(( "Starting to produce a snapshot for slot=%lu in directory=%s", snapshot_ctx->slot, snapshot_ctx->out_dir ));
    1139             : 
    1140             :   /* Validate that the snapshot_ctx is setup correctly. */
    1141             : 
    1142           0 :   fd_snapshot_create_setup_and_validate_ctx( snapshot_ctx );
    1143             : 
    1144             :   /* Setup the tar archive writer. */
    1145             : 
    1146           0 :   fd_snapshot_create_setup_writer( snapshot_ctx );
    1147             : 
    1148             :   /* Write out the version file. */
    1149             : 
    1150           0 :   fd_snapshot_create_write_version( snapshot_ctx );
    1151             : 
    1152             :   /* Dump the status cache and append it to the tar archive. */
    1153             : 
    1154           0 :   fd_snapshot_create_write_status_cache( snapshot_ctx );
    1155             : 
    1156             :   /* Populate and write out the manifest and append vecs. */
    1157             : 
    1158           0 :   fd_snapshot_create_write_manifest_and_acc_vecs( snapshot_ctx, out_hash, out_capitalization );
    1159             : 
    1160             :   /* Compress the tar file and write it out to the specified directory. */
    1161             : 
    1162           0 :   fd_snapshot_create_compress( snapshot_ctx );
    1163             : 
    1164           0 :   FD_LOG_NOTICE(("Finished producing a snapshot" ));
    1165             : 
    1166           0 :   } FD_SCRATCH_SCOPE_END;
    1167           0 : }

Generated by: LCOV version 1.14