Line data Source code
1 : #ifndef HEADER_fd_src_discof_tower_fd_hfork_h 2 : #define HEADER_fd_src_discof_tower_fd_hfork_h 3 : 4 : /* fd_hfork presents an API for detecting hard forks by monitoring 5 : votes from validators and comparing their bank hashes against ours. 6 : 7 : The Solana chain occasionally forks for what we will call soft and 8 : hard reasons. 9 : 10 : - Soft forks occur due to network latency and the distributed 11 : nature of the network. Different validators may see different 12 : blocks at different times, and so may disagree on ledger state 13 : temporarily until the network converges. This is expected and 14 : normal. 15 : 16 : - Hard forks occur when validators have a disagreement about the 17 : rules of the protocol and come to different conclusions about the 18 : permanent state of the ledger. This is not expected or normal. 19 : 20 : For Firedancer, it is useful to be able to detect when any hard fork 21 : has occurred, because it means there was likely some consensus bug 22 : that caused us to diverge from Agave. What we check is actually 23 : broader: it is possible that we disagreed about the result of some 24 : block, but did not diverge from Agave as the block we disagreed on 25 : did not become canonical. We still want to detect this case, as it 26 : indicates a consensus bug even if we did not ultimately diverge. 27 : 28 : We detect hard forked blocks by monitoring votes from validators, the 29 : specific criteria is: 30 : 31 : - 52% of stake has voted on a block_id and agreed on a bank_hash 32 : which is different than the bank_hash we have for that block_id. 33 : 34 : - 52% of stake has voted on a block_id and agreed on a bank_hash 35 : when we marked the block dead because it failed to execute. 36 : 37 : Interestingly we do not need to handle the case where we thought a 38 : block succeeded, but the rest of the cluster failed it, because this 39 : does not cause a hard fork. We simply switch to the canonical fork 40 : the rest of the cluster will converge on. It is not really possible 41 : to detect this case, because other nodes do not report that the block 42 : failed, they simply vote somewhere else. 43 : 44 : We are essentially receiving a 45 : 46 : Stream<Either<(VoteAccountPubkey, BlockId, BankHash), EpochStakes>> 47 : 48 : The first variant is a vote from some validator for a certain 49 : block_id and block_hash. This could come from either any replayed 50 : block, or gossip. It does not matter where it comes from, or even if 51 : the source is valid (e.g. it could come from a block which gets 52 : marked dead because it did not validate), all the matters is that the 53 : vote is validly signed by the vote account. 54 : 55 : Internally, we maintain a Map<BlockId, Map<BankHash, StakeWeight>>. 56 : Any time we receive a vote, if it causes an entry in this map to 57 : exceed 52% of the total stake weight, or it is from our own validator 58 : identity, we check if our vote is different, and potentially raise a 59 : warning or error. 60 : 61 : The last max_live_slots votes for each vote account are kept in a 62 : ring buffer and replaced as new votes come in, to prevent unbounded 63 : memory usage. This makes the structure somewhat heuristic: it might 64 : be that if some nodes are very far ahead, and some behind, we might 65 : evict old votes for those ahead and never see a fork exceed 52% in 66 : our window. This is unlikely to happen in practice, and even if it 67 : does, it only means we might miss detecting a hard fork, which is not 68 : catastrophic. The default behavior anyway should be to continue 69 : running on the forked chain. */ 70 : 71 : #include "../fd_choreo_base.h" 72 : 73 : struct fd_hfork; 74 : typedef struct fd_hfork fd_hfork_t; 75 : 76 : FD_PROTOTYPES_BEGIN 77 : 78 : /* fd_hfork_{align,footprint} return the required alignment and 79 : footprint of a memory region suitable for use as a hfork. align 80 : returns fd_hfork_ALIGN. footprint returns fd_hfork_FOOTPRINT. */ 81 : 82 : FD_FN_CONST ulong 83 : fd_hfork_align( void ); 84 : 85 : FD_FN_CONST ulong 86 : fd_hfork_footprint( ulong per_vtr_max, 87 : ulong vtr_max ); 88 : 89 : /* fd_hfork_new formats an unused memory region for use as a hfork. mem 90 : is a non-NULL pointer to this region in the local address space with 91 : the required footprint and alignment. */ 92 : 93 : void * 94 : fd_hfork_new( void * mem, 95 : ulong per_vtr_max, 96 : ulong vtr_max, 97 : ulong seed ); 98 : 99 : /* fd_hfork_join joins the caller to the hfork. hfork points to the 100 : first byte of the memory region backing the hfork in the caller's 101 : address space. 102 : 103 : Returns a pointer in the local address space to hfork on success. */ 104 : 105 : fd_hfork_t * 106 : fd_hfork_join( void * hfork ); 107 : 108 : /* fd_hfork_leave leaves a current local join. Returns a pointer to the 109 : underlying shared memory region on success and NULL on failure (logs 110 : details). Reasons for failure include hfork is NULL. */ 111 : 112 : void * 113 : fd_hfork_leave( fd_hfork_t const * hfork ); 114 : 115 : /* fd_hfork_delete unformats a memory region used as a hfork. Assumes 116 : only the local process is joined to the region. Returns a pointer to 117 : the underlying shared memory region or NULL if used obviously in 118 : error (e.g. hfork is obviously not a hfork ... logs details). The 119 : ownership of the memory region is transferred to the caller. */ 120 : 121 : void * 122 : fd_hfork_delete( void * hfork ); 123 : 124 : /* fd_hfork_count_vote return codes. */ 125 : 126 0 : #define FD_HFORK_SUCCESS_MATCHED ( 1) /* vote counted, bank hash matches ours */ 127 0 : #define FD_HFORK_SUCCESS ( 0) /* vote counted successfully */ 128 0 : #define FD_HFORK_ERR_MISMATCHED (-1) /* vote counted, bank hash does not match ours */ 129 3 : #define FD_HFORK_ERR_UNKNOWN_VTR (-2) /* voter not in vtr_map */ 130 3 : #define FD_HFORK_ERR_ALREADY_VOTED (-3) /* voter already voted for this block_id */ 131 3 : #define FD_HFORK_ERR_VOTE_TOO_OLD (-4) /* vote slot not newer than previous */ 132 : 133 : /* fd_hfork_count_vote updates the hard fork detector with a newly 134 : observed vote. Returns FD_HFORK_SUCCESS_MATCHED if the bank hash 135 : matches ours, FD_HFORK_SUCCESS if counted but not yet compared, 136 : FD_HFORK_ERR_MISMATCHED if the bank hash does not match ours, or a 137 : negative FD_HFORK_ERR_* code if the vote was not counted. */ 138 : 139 : int 140 : fd_hfork_count_vote( fd_hfork_t * hfork, 141 : fd_pubkey_t const * vote_acc, 142 : fd_hash_t const * block_id, 143 : fd_hash_t const * bank_hash, 144 : ulong slot, 145 : ulong stake, 146 : ulong total_stake ); 147 : 148 : /* fd_hfork_record_our_bank_hash updates the hard fork detector with our 149 : bank hash (computed on replay) for a given block ID. If bank_hash is 150 : NULL, this indicates the block was marked dead during replay and we 151 : did not think it was valid. Returns a flag indicating the outcome: 152 : 1 (match), -1 (mismatch), or 0 (not yet compared). */ 153 : 154 : int 155 : fd_hfork_record_our_bank_hash( fd_hfork_t * hfork, 156 : fd_hash_t const * block_id, 157 : fd_hash_t const * bank_hash, 158 : ulong total_stake ); 159 : 160 : /* fd_hfork_update_voters updates the set of voters tracked by the hard 161 : fork detector. Voters not in vote_accs[0..cnt) are removed along 162 : with all their vote entries. New voters are added. This should be 163 : called on each epoch boundary when the stake-weighted voter set 164 : changes. vote_accs is an array of vote account addresses of length 165 : cnt. */ 166 : 167 : void 168 : fd_hfork_update_voters( fd_hfork_t * hfork, 169 : fd_pubkey_t const * vote_accs, 170 : ulong cnt ); 171 : 172 : FD_PROTOTYPES_END 173 : 174 : #endif /* HEADER_fd_src_discof_tower_fd_hfork_h */