Line data Source code
1 : #ifndef HEADER_fd_src_flamenco_repair_fd_repair_h
2 : #define HEADER_fd_src_flamenco_repair_fd_repair_h
3 :
4 : #include "../gossip/fd_gossip.h"
5 : #include "../../ballet/shred/fd_shred.h"
6 : #include "../../disco/metrics/generated/fd_metrics_repair.h"
7 : #include "../../disco/metrics/fd_metrics.h"
8 :
9 :
10 : #define FD_REPAIR_DELIVER_FAIL_TIMEOUT -1
11 : #define FD_REPAIR_DELIVER_FAIL_REQ_LIMIT_EXCEEDED -2
12 :
13 : /* Maximum size of a network packet */
14 0 : #define FD_REPAIR_MAX_PACKET_SIZE 1232
15 :
16 : /* Scratch space is used by the repair library to allocate an
17 : active element table and to shuffle that table.
18 : TODO: update comment to reflect the reasoning behind
19 : these constants once they are fully understood and updated. */
20 0 : #define FD_REPAIR_SCRATCH_MAX (1UL << 30UL)
21 0 : #define FD_REPAIR_SCRATCH_DEPTH (1UL << 11UL)
22 :
23 : /* Max number of validators that can be actively queried */
24 0 : #define FD_ACTIVE_KEY_MAX (1<<12)
25 : /* Max number of pending shred requests */
26 0 : #define FD_NEEDED_KEY_MAX (1<<20)
27 : /* Max number of sticky repair peers */
28 : #define FD_REPAIR_STICKY_MAX 1024
29 : /* Max number of validator identities in stake weights */
30 0 : #define FD_STAKE_WEIGHTS_MAX (1<<14)
31 : /* Max number of validator clients that we ping */
32 0 : #define FD_REPAIR_PINGED_MAX (1<<14)
33 : /* Sha256 pre-image size for pings */
34 0 : #define FD_PING_PRE_IMAGE_SZ (48UL)
35 : /* Number of peers to send requests to. */
36 0 : #define FD_REPAIR_NUM_NEEDED_PEERS (2)
37 :
38 : typedef fd_gossip_peer_addr_t fd_repair_peer_addr_t;
39 :
40 : /* Hash a hash value */
41 : FD_FN_PURE static inline
42 0 : ulong fd_hash_hash( const fd_hash_t * key, ulong seed ) {
43 0 : return key->ul[0] ^ seed;
44 0 : }
45 :
46 :
47 : /* Test if two addresses are equal */
48 : FD_FN_PURE static inline int
49 0 : fd_repair_peer_addr_eq( const fd_repair_peer_addr_t * key1, const fd_repair_peer_addr_t * key2 ) {
50 0 : FD_STATIC_ASSERT(sizeof(fd_repair_peer_addr_t) == sizeof(ulong),"messed up size");
51 0 : return key1->l == key2->l;
52 0 : }
53 :
54 : /* Hash an address */
55 : FD_FN_PURE static inline ulong
56 0 : fd_repair_peer_addr_hash( const fd_repair_peer_addr_t * key, ulong seed ) {
57 0 : FD_STATIC_ASSERT(sizeof(fd_repair_peer_addr_t) == sizeof(ulong),"messed up size");
58 0 : return (key->l + seed + 7242237688154252699UL)*9540121337UL;
59 0 : }
60 :
61 : /* Efficiently copy an address */
62 : static inline void
63 0 : fd_repair_peer_addr_copy( fd_repair_peer_addr_t * keyd, const fd_repair_peer_addr_t * keys ) {
64 0 : FD_STATIC_ASSERT(sizeof(fd_repair_peer_addr_t) == sizeof(ulong),"messed up size");
65 0 : keyd->l = keys->l;
66 0 : }
67 :
68 : typedef uint fd_repair_nonce_t;
69 :
70 : /* Active table element. This table is all validators that we are
71 : asking for repairs. */
72 : struct fd_active_elem {
73 : fd_pubkey_t key; /* Public identifier and map key */
74 : ulong next; /* used internally by fd_map_giant */
75 :
76 : fd_repair_peer_addr_t addr;
77 : // Might be worth keeping these fields, but currently response rate is pretty high.
78 : // latency could be a useful metric to keep track of.
79 : ulong avg_reqs; /* Moving average of the number of requests */
80 : ulong avg_reps; /* Moving average of the number of requests */
81 : long avg_lat; /* Moving average of response latency */
82 : ulong stake;
83 : };
84 : /* Active table */
85 : typedef struct fd_active_elem fd_active_elem_t;
86 : #define MAP_NAME fd_active_table
87 : #define MAP_KEY_T fd_pubkey_t
88 0 : #define MAP_KEY_EQ(a,b) (0==memcmp( (a),(b),sizeof(fd_pubkey_t) ))
89 0 : #define MAP_KEY_HASH fd_hash_hash
90 0 : #define MAP_T fd_active_elem_t
91 : #include "../../util/tmpl/fd_map_giant.c"
92 :
93 : enum fd_needed_elem_type {
94 : fd_needed_window_index, fd_needed_highest_window_index, fd_needed_orphan
95 : };
96 :
97 : struct fd_inflight_key {
98 : enum fd_needed_elem_type type;
99 : ulong slot;
100 : uint shred_index;
101 : };
102 : typedef struct fd_inflight_key fd_inflight_key_t;
103 :
104 : struct fd_inflight_elem {
105 : fd_inflight_key_t key;
106 : long last_send_time;
107 : uint req_cnt;
108 : ulong next;
109 : };
110 : typedef struct fd_inflight_elem fd_inflight_elem_t;
111 :
112 : FD_FN_PURE static inline int
113 0 : fd_inflight_eq( const fd_inflight_key_t * key1, const fd_inflight_key_t * key2 ) {
114 0 : return (key1->type == key2->type) &&
115 0 : (key1->slot == key2->slot) &&
116 0 : (key1->shred_index == key2->shred_index);
117 0 : }
118 :
119 : FD_FN_PURE static inline ulong
120 0 : fd_inflight_hash( const fd_inflight_key_t * key, ulong seed ) {
121 0 : return (key->slot + seed)*9540121337UL + key->shred_index*131U;
122 0 : }
123 :
124 : static inline void
125 0 : fd_inflight_copy( fd_inflight_key_t * keyd, const fd_inflight_key_t * keys ) {
126 0 : *keyd = *keys;
127 0 : }
128 :
129 : #define MAP_NAME fd_inflight_table
130 : #define MAP_KEY_T fd_inflight_key_t
131 0 : #define MAP_KEY_EQ fd_inflight_eq
132 0 : #define MAP_KEY_HASH fd_inflight_hash
133 0 : #define MAP_KEY_COPY fd_inflight_copy
134 0 : #define MAP_T fd_inflight_elem_t
135 : #include "../../util/tmpl/fd_map_giant.c"
136 :
137 : FD_FN_PURE static inline int
138 0 : fd_repair_nonce_eq( const fd_repair_nonce_t * key1, const fd_repair_nonce_t * key2 ) {
139 0 : return *key1 == *key2;
140 0 : }
141 :
142 : FD_FN_PURE static inline ulong
143 0 : fd_repair_nonce_hash( const fd_repair_nonce_t * key, ulong seed ) {
144 0 : return (*key + seed + 7242237688154252699UL)*9540121337UL;
145 0 : }
146 :
147 : static inline void
148 0 : fd_repair_nonce_copy( fd_repair_nonce_t * keyd, const fd_repair_nonce_t * keys ) {
149 0 : *keyd = *keys;
150 0 : }
151 :
152 : struct fd_pinged_elem {
153 : fd_repair_peer_addr_t key;
154 : ulong next;
155 : fd_pubkey_t id;
156 : fd_hash_t token;
157 : int good;
158 : };
159 : typedef struct fd_pinged_elem fd_pinged_elem_t;
160 : #define MAP_NAME fd_pinged_table
161 : #define MAP_KEY_T fd_repair_peer_addr_t
162 : #define MAP_KEY_EQ fd_repair_peer_addr_eq
163 : #define MAP_KEY_HASH fd_repair_peer_addr_hash
164 : #define MAP_KEY_COPY fd_repair_peer_addr_copy
165 0 : #define MAP_T fd_pinged_elem_t
166 : #include "../../util/tmpl/fd_map_giant.c"
167 :
168 : struct fd_peer {
169 : fd_pubkey_t key;
170 : fd_ip4_port_t ip4;
171 : };
172 : typedef struct fd_peer fd_peer_t;
173 : /* Repair Metrics */
174 : struct fd_repair_metrics {
175 : ulong recv_clnt_pkt;
176 : ulong recv_serv_pkt;
177 : ulong recv_serv_corrupt_pkt;
178 : ulong recv_serv_invalid_signature;
179 : ulong recv_serv_full_ping_table;
180 : ulong recv_serv_pkt_types[FD_METRICS_ENUM_REPAIR_SERV_PKT_TYPES_CNT];
181 : ulong recv_pkt_corrupted_msg;
182 : ulong send_pkt_cnt;
183 : ulong sent_pkt_types[FD_METRICS_ENUM_REPAIR_SENT_REQUEST_TYPES_CNT];
184 : fd_histf_t store_link_wait[ 1 ];
185 : fd_histf_t store_link_work[ 1 ];
186 : };
187 : typedef struct fd_repair_metrics fd_repair_metrics_t;
188 : #define FD_REPAIR_METRICS_FOOTPRINT ( sizeof( fd_repair_metrics_t ) )
189 : /* Global data for repair service */
190 : struct fd_repair {
191 : /* Current time in nanosecs */
192 : long now;
193 : /* My public/private key */
194 : fd_pubkey_t * public_key;
195 : uchar * private_key;
196 : /* My repair addresses */
197 : fd_repair_peer_addr_t service_addr;
198 : fd_repair_peer_addr_t intake_addr;
199 : /* Function used to send raw packets on the network */
200 : void * fun_arg;
201 : /* Table of validators that we are actively pinging, keyed by repair address */
202 : fd_active_elem_t * actives;
203 :
204 : /* TODO remove, along with good peer cache file */
205 : fd_pubkey_t actives_sticky[FD_REPAIR_STICKY_MAX]; /* cache of chosen repair peer samples */
206 : ulong actives_sticky_cnt;
207 : ulong actives_random_seed;
208 :
209 : fd_peer_t peers[ FD_ACTIVE_KEY_MAX ];
210 : ulong peer_cnt; /* number of peers in the peers array */
211 : ulong peer_idx; /* max number of peers in the peers array */
212 :
213 : /* Duplicate request detection table */
214 : fd_inflight_elem_t * dupdetect;
215 :
216 : /* Table of needed shreds */
217 : fd_repair_nonce_t oldest_nonce;
218 : fd_repair_nonce_t current_nonce;
219 : fd_repair_nonce_t next_nonce;
220 : /* Table of validator clients that we have pinged */
221 : fd_pinged_elem_t * pinged;
222 : /* Last batch of sends */
223 : long last_sends;
224 : /* Last statistics decay */
225 : long last_decay;
226 : /* Last statistics printout */
227 : long last_print;
228 : /* Last write to good peer cache file */
229 : long last_good_peer_cache_file_write;
230 : /* Random number generator */
231 : fd_rng_t rng[1];
232 : /* RNG seed */
233 : ulong seed;
234 : /* Stake weights */
235 : ulong stake_weights_cnt;
236 : fd_vote_stake_weight_t * stake_weights;
237 : ulong stake_weights_temp_cnt;
238 : fd_vote_stake_weight_t * stake_weights_temp;
239 : /* Path to the file where we write the cache of known good repair peers, to make cold booting faster */
240 : int good_peer_cache_file_fd;
241 : /* Metrics */
242 : fd_repair_metrics_t metrics;
243 : };
244 : typedef struct fd_repair fd_repair_t;
245 :
246 : FD_FN_CONST static inline ulong
247 0 : fd_repair_align ( void ) { return 128UL; }
248 :
249 : FD_FN_CONST static inline ulong
250 0 : fd_repair_footprint( void ) {
251 0 : ulong l = FD_LAYOUT_INIT;
252 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_repair_t), sizeof(fd_repair_t) );
253 0 : l = FD_LAYOUT_APPEND( l, fd_active_table_align(), fd_active_table_footprint(FD_ACTIVE_KEY_MAX) );
254 0 : l = FD_LAYOUT_APPEND( l, fd_inflight_table_align(), fd_inflight_table_footprint(FD_NEEDED_KEY_MAX) );
255 0 : l = FD_LAYOUT_APPEND( l, fd_pinged_table_align(), fd_pinged_table_footprint(FD_REPAIR_PINGED_MAX) );
256 : /* regular and temp stake weights */
257 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_vote_stake_weight_t), FD_STAKE_WEIGHTS_MAX * sizeof(fd_vote_stake_weight_t) );
258 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_vote_stake_weight_t), FD_STAKE_WEIGHTS_MAX * sizeof(fd_vote_stake_weight_t) );
259 0 : return FD_LAYOUT_FINI(l, fd_repair_align() );
260 0 : }
261 :
262 : /* Global state of repair protocol */
263 : FD_FN_CONST ulong fd_repair_align ( void );
264 : FD_FN_CONST ulong fd_repair_footprint( void );
265 : void * fd_repair_new ( void * shmem, ulong seed );
266 : fd_repair_t * fd_repair_join ( void * shmap );
267 : void * fd_repair_leave ( fd_repair_t * join );
268 : void * fd_repair_delete ( void * shmap );
269 :
270 : struct fd_repair_config {
271 : fd_pubkey_t * public_key;
272 : uchar * private_key;
273 : fd_repair_peer_addr_t service_addr;
274 : fd_repair_peer_addr_t intake_addr;
275 : int good_peer_cache_file_fd;
276 : };
277 : typedef struct fd_repair_config fd_repair_config_t;
278 :
279 : /* Initialize the repair data structure */
280 : int fd_repair_set_config( fd_repair_t * glob, const fd_repair_config_t * config );
281 :
282 : /* Update the binding addr */
283 : int fd_repair_update_addr( fd_repair_t * glob, const fd_repair_peer_addr_t * intake_addr, const fd_repair_peer_addr_t * service_addr );
284 :
285 : /* Add a peer to talk to */
286 : int fd_repair_add_active_peer( fd_repair_t * glob, fd_repair_peer_addr_t const * addr, fd_pubkey_t const * id );
287 :
288 : /* Set the current protocol time inf nanosecs. Call this as often as feasible. */
289 : void fd_repair_settime( fd_repair_t * glob, long ts );
290 :
291 : /* Get the current protocol time in nanosecs */
292 : long fd_repair_gettime( fd_repair_t * glob );
293 :
294 : /* Start timed events and other protocol behavior. settime MUST be called before this. */
295 : int fd_repair_start( fd_repair_t * glob );
296 :
297 : /* Dispatch timed events and other protocol behavior. This should be
298 : * called inside the main spin loop. calling settime first is recommended. */
299 : int fd_repair_continue( fd_repair_t * glob );
300 :
301 : int fd_repair_inflight_remove( fd_repair_t * glob,
302 : ulong slot,
303 : uint shred_index );
304 :
305 : /* Register a request for a shred */
306 : int fd_repair_need_window_index( fd_repair_t * glob, ulong slot, uint shred_index );
307 :
308 : int fd_repair_need_highest_window_index( fd_repair_t * glob, ulong slot, uint shred_index );
309 :
310 : int fd_repair_need_orphan( fd_repair_t * glob, ulong slot );
311 :
312 : int
313 : fd_repair_construct_request_protocol( fd_repair_t * glob,
314 : fd_repair_protocol_t * protocol,
315 : enum fd_needed_elem_type type,
316 : ulong slot,
317 : uint shred_index,
318 : fd_pubkey_t const * recipient,
319 : uint nonce,
320 : long now );
321 :
322 : void fd_repair_add_sticky( fd_repair_t * glob, fd_pubkey_t const * id );
323 :
324 : void fd_repair_set_stake_weights_init( fd_repair_t * repair,
325 : fd_vote_stake_weight_t const * stake_weights,
326 : ulong stake_weights_cnt );
327 :
328 : void fd_repair_set_stake_weights_fini( fd_repair_t * repair );
329 :
330 : fd_repair_metrics_t *
331 : fd_repair_get_metrics( fd_repair_t * repair );
332 :
333 :
334 : #endif /* HEADER_fd_src_flamenco_repair_fd_repair_h */
|