Line data Source code
1 : #ifndef HEADER_fd_src_disco_shred_fd_fec_resolver_h
2 : #define HEADER_fd_src_disco_shred_fd_fec_resolver_h
3 : #include "../../ballet/shred/fd_fec_set.h"
4 : #include "../../ballet/bmtree/fd_bmtree.h"
5 : #include "../../ballet/ed25519/fd_ed25519.h"
6 :
7 : /* This header defines several methods for building and validating FEC
8 : sets from received shreds. It's designed just for use by the shred
9 : tile, which is why it's in disco/shred.
10 :
11 : The primary complication in the interface comes from lifetimes.
12 : Buffers returned by the networking layer are typically ephemeral and
13 : we need to hold onto the data until we've finished the FEC set, so we
14 : need at least one memcpy. Once we complete the FEC set, the result
15 : needs to go out on an mcache/dcache pair and on the network, which
16 : also have different lifetime requirements. The FEC resolver goes out
17 : of its way to use memory in a way that doesn't require a second
18 : memcpy once the FEC set is complete. To that end, the FEC resolver
19 : makes two promises:
20 : 1. Once memory has been used for a specific FEC set, it will not be
21 : reused for a different FEC set until at least partial_depth other
22 : distinct FEC sets have been returned in the out_fec_set field of
23 : fd_fec_resolver_add_shred or fd_fec_resolver_force_complete.
24 : 2. Once a FEC set is complete (specified with COMPLETES), the
25 : associated memory will not be reused for a different FEC set until
26 : at least complete_depth other distinct FEC sets have been returned
27 : from calls to fd_fec_resolver_add_shred or
28 : fd_fec_resolver_force_complete that return SHRED_COMPLETES.
29 :
30 : This is implemented using a freelist with an insertion point in the
31 : middle (which can also be seen as two chained freelists):
32 : ------------------------
33 : | In progress FEC |--------- if completed -
34 : | sets (<=depth) | |
35 : | |--- if spilled ---- |
36 : ------------------------ | |
37 : ^ | |
38 : | V |
39 : -------- | |
40 : | | - | V
41 : | Free | \ | |
42 : | | >=partial_depth | |
43 : | FEC | / | |
44 : | sets | | | |
45 : | | _ | |
46 : -------- V |
47 : ^ ^ | |
48 : | |------<---------------<----------| |
49 : | |
50 : -------- |
51 : | | - |
52 : | Comp | \ |
53 : |leted | complete_depth |
54 : | | / V
55 : | FEC | | |
56 : | sets | | |
57 : | | - |
58 : -------- |
59 : ^ |
60 : |-------------------<----------------------
61 :
62 : When a shred arrives for a new FEC set, we pull an entry from the
63 : head of the free FEC set queue. If that would result in more than
64 : depth sets in progress, we spill the oldest in progress set, and
65 : insert it at the tail of the free set queue. When we complete a set,
66 : we remove it from the in progress set, add it to the tail of the
67 : completed FEC set queue, and move one element from the head of the
68 : completed queue to the tail of the free queue.
69 :
70 : Since the completed queue only advances when an FEC set is completed,
71 : any complete FEC set stays in that queue for at least complete_depth
72 : completions.
73 :
74 : It might seems like this system is overkill and one combined longer
75 : freelist would suffice, but that's incorrect. Consider the following
76 : scenario: Suppose we've just completed FEC set A, so we return it
77 : with COMPLETES and then we put it at the end of the free list. Now
78 : we receive a shred for a new FEC set, so we take memory from the head
79 : of the free list. That happens many times, but we never receive
80 : enough shreds for any FEC set to complete one. Eventually, we churn
81 : through the whole singular freelist with spilling until the memory we
82 : used for A gets to the head of the freelist. Finally we receive
83 : enough shreds to complete the FEC set, so we return A with COMPLETES.
84 : Thus, from the perspective of a consumer that only cares about
85 : completed FEC sets, we've returned the same piece of memory twice in
86 : a row. */
87 :
88 : /* Forward declare opaque handle. It has a lot of types we don't
89 : necessarily want to bring into the includer */
90 0 : #define FD_FEC_RESOLVER_ALIGN (128UL)
91 : struct fd_fec_resolver;
92 : typedef struct fd_fec_resolver fd_fec_resolver_t;
93 :
94 642 : #define SHRED_CNT_NOT_SET (UINT_MAX/2U)
95 :
96 : /* fd_fec_resolver_sign_fn: used to sign shreds that require a
97 : retransmitter signature. */
98 : typedef void (fd_fec_resolver_sign_fn)( void * ctx, uchar * sig, uchar const * merkle_root );
99 :
100 : FD_PROTOTYPES_BEGIN
101 : /* fd_fec_resolver_footprint returns the required footprint (in bytes as
102 : always) required to create an FEC set resolver that can keep track of
103 : `depth` in progress FEC sets, will not reuse FEC sets for at least
104 : partial_depth shreds or for at least complete_depth complete FEC sets
105 : (see above for more information). Additionally, the FEC resolver
106 : remembers done_depth FEC sets to recognize duplicates vs. new FEC
107 : sets. All depths must positive.
108 :
109 : fd_fec_resolver_alignment returns the required alignment of a region
110 : of memory for it to be used as a FEC resolver. */
111 : FD_FN_PURE ulong fd_fec_resolver_footprint( ulong depth, ulong partial_depth, ulong complete_depth, ulong done_depth );
112 : FD_FN_CONST ulong fd_fec_resolver_align ( void );
113 :
114 : /* fd_fec_resolver_new formats a region of memory as a FEC resolver.
115 : shmem must have the required alignment and footprint. signer is a
116 : function pointer used to sign any shreds that require a retransmitter
117 : signature, and sign_ctx is an opaque pointer passed as the first
118 : argument to the function. It is okay to pass NULL for signer, in
119 : which case, retransmission signatures will just be zeroed and
120 : sign_ctx will be ignored. depth, partial_depth, complete_depth, and
121 : done_depth are as defined above and must be positive. sets is a
122 : pointer to the first of depth+partial_depth+complete_depth FEC sets
123 : that this resolver will take ownership of. The FEC resolver retains
124 : a write interest in these FEC sets and the shreds they point to until
125 : the resolver is deleted. These FEC sets and the memory for the
126 : shreds they point to are the only values that will be returned in the
127 : output parameters of _{add_shred, force_completes}. The FEC resolver
128 : will reject any shreds with a shred version that does not match the
129 : value provided for expected_shred_version. Shred versions are always
130 : non-zero, so expected_shred_version must be non-zero. The FEC
131 : resolver will also reject any shred that seems to be part of a block
132 : containing more than max_shred_idx data or parity shreds. Since
133 : shred_idx is a uint, it doesn't really make sense to have
134 : max_shred_idx > UINT_MAX, and max_shred_idx==0 rejects all shreds.
135 : Returns shmem on success and NULL on failure (logs details). */
136 : void *
137 : fd_fec_resolver_new( void * shmem,
138 : fd_fec_resolver_sign_fn * signer,
139 : void * sign_ctx,
140 : ulong depth,
141 : ulong partial_depth,
142 : ulong complete_depth,
143 : ulong done_depth,
144 : fd_fec_set_t * sets,
145 : ulong max_shred_idx );
146 :
147 : fd_fec_resolver_t * fd_fec_resolver_join( void * shmem );
148 :
149 : void
150 : fd_fec_resolver_set_shred_version( fd_fec_resolver_t * resolver,
151 : ushort expected_shred_version );
152 :
153 :
154 39 : #define FD_FEC_RESOLVER_SHRED_REJECTED (-2)
155 1194 : #define FD_FEC_RESOLVER_SHRED_IGNORED (-1)
156 8667 : #define FD_FEC_RESOLVER_SHRED_OKAY ( 0)
157 273 : #define FD_FEC_RESOLVER_SHRED_COMPLETES ( 1)
158 :
159 : /* Return values + RETVAL_OFF are in [0, RETVAL_CNT) */
160 : #define FD_FEC_RESOLVER_ADD_SHRED_RETVAL_CNT 4
161 0 : #define FD_FEC_RESOLVER_ADD_SHRED_RETVAL_OFF 2
162 :
163 : struct fd_fec_resolver_spilled {
164 : ulong slot;
165 : uint fec_set_idx;
166 : uint max_dshred_idx; /* position in FEC set, in [0, FD_REEDSOL_DATA_SHREDS_MAX) */
167 : };
168 : typedef struct fd_fec_resolver_spilled fd_fec_resolver_spilled_t;
169 :
170 : /* fd_fec_resolver_add_shred notifies the FEC resolver of a newly
171 : received shred. The FEC resolver validates the shred and copies it
172 : into its own storage. resolver is a local join of an FEC resolver.
173 : shred is a pointer to the new shred that should be added. shred_sz
174 : is the size of the shred in bytes.
175 :
176 : On success ie. SHRED_{OKAY,COMPLETES}, a pointer to the fd_fec_set_t
177 : structure representing the FEC set of which the shred is a part will
178 : be written to out_fec_set. Additionally, on success a pointer to a
179 : copy of shred will be written to the location pointed to by
180 : out_shred. See the long explanation above about the lifetimes of
181 : these pointers. Finally, on success the merkle root of the shred
182 : (reconstructed from the merkle proof) will be written to
183 : out_merkle_root. Unlike out_{fec_set,shred}, caller owns and
184 : provides the memory for out_merkle_root. If the out_merkle_root
185 : pointer is NULL, the argument will be ignored and merkle root will
186 : not be written.
187 :
188 : If the shred fails validation for any reason, returns SHRED_REJECTED
189 : and does not write to out_{fec_set,shred,merkle_root}. If the shred
190 : is a duplicate of a shred that has already been received (ie. a shred
191 : with the same index but a different payload), returns SHRED_IGNORED
192 : does not write to out_{fec_set,shred,merkle_root}.
193 :
194 : Note that only light validation is performed on a duplicate shred, so
195 : a shred that is actually invalid but looks like a duplicate of a
196 : previously received valid shred may be considered SHRED_IGNORED
197 : instead of SHRED_REJECTED.
198 :
199 : This function returns SHRED_COMPLETES when the received shred is the
200 : last one and completes the FEC set. In this case, the function
201 : populates any missing shreds in the FEC set stored in out_fec_set.
202 :
203 : Regardless of success/failure, if an incomplete FEC set was evicted
204 : from the current map during this add_shred call, the metadata of the
205 : evicted FEC set will be written to out_spilled_fec_set. The FEC set
206 : metadata includes slot, fec_set_idx, and also highest data shred
207 : index received thus far in this FEC set, in the range
208 : [0, FD_REEDSOL_DATA_SHREDS_MAX). If no data shreds have been
209 : received yet (i.e., only parity shreds have been received),
210 : max_dshred_idx will be FD_SHRED_BLK_MAX. If no FEC set was evicted,
211 : out_spilled_fec_set will remain unmodified. Similar to
212 : out_merkle_root, the caller owns and provides the memory for
213 : out_spilled_fec_set. If the out_spilled_fec_set pointer is NULL, the
214 : argument will be ignored and the evicted FEC set metadata will not be
215 : written. */
216 :
217 : int
218 : fd_fec_resolver_add_shred( fd_fec_resolver_t * resolver,
219 : fd_shred_t const * shred,
220 : ulong shred_sz,
221 : uchar const * leader_pubkey,
222 : fd_fec_set_t const * * out_fec_set,
223 : fd_shred_t const * * out_shred,
224 : fd_bmtree_node_t * out_merkle_root,
225 : fd_fec_resolver_spilled_t * out_spilled_fec_set );
226 :
227 :
228 : /* fd_fec_resolver_done_contains returns 1 if the FEC with signature
229 : lives in the done_map, and thus means it has been completed. Returns
230 : 0 otherwise. */
231 : int
232 : fd_fec_resolver_done_contains( fd_fec_resolver_t * resolver,
233 : fd_ed25519_sig_t const * signature );
234 :
235 : /* fd_fec_resolver_shred_query returns the data shred in the FEC set
236 : with signature at shred_idx. The return shred is copied to the region
237 : of memory pointed to by out_shred, and will copy only up to
238 : FD_SHRED_MIN_SZ bytes. Returns FD_FEC_RESOLVER_SHRED_REJECTED if the
239 : FEC set does not live in the curr_map, and in this case, the user
240 : should ignore the out_shred. If the FEC set with signature lives in
241 : the curr_map (i.e., is an in-progress FEC set), then the data shred
242 : at shred_idx is copied to out_shred and FD_FEC_RESOLVER_SHRED_OKAY is
243 : returned. Note: no validation on shred idx bounds is performed, so it
244 : is up to the user to ensure that provided shred_idx is between [0,
245 : data_cnt). No validation that the shred at shred_idx has been
246 : received is performed either. If either of these are not satisfied,
247 : upon return the value of out_shred[i] for i in [0, FD_SHRED_MIN_SZ)
248 : is undefined.
249 :
250 : The use case for this function is solely for the force completion
251 : API, which requires the last shred in a FEC set. This function should
252 : be removed at the time when force completion is removed. */
253 : int
254 : fd_fec_resolver_shred_query( fd_fec_resolver_t * resolver,
255 : fd_ed25519_sig_t const * signature,
256 : uint shred_idx,
257 : uchar * out_shred );
258 :
259 : /* fd_fec_resolver_force_complete forces completion of a partial FEC set
260 : in the FEC resolver.
261 :
262 : API is similar to add_shred. last_shred is what the caller has
263 : determined to be the last shred in the FEC set. out_fec_set is set
264 : to a pointer to the complete FEC set on SHRED_COMPLETES. Similar to
265 : add_shred, see the long explanation at the top of this file for
266 : details on the lifetime of out_fec_set. out_merkle_root if non-NULL
267 : will contain a copy of the Merkle root of the FEC set on success.
268 :
269 : Returns SHRED_COMPLETES when last_shred validates successfully with
270 : the in-progress FEC set, SHRED_IGNORED if the FEC set containing
271 : last_shred has already been completed (done) and SHRED_REJECTED when
272 : the last_shred provided is obviously invalid or the in-progress FEC
273 : does not validate.
274 :
275 : This function is a temporary measure to address a current limitation
276 : in the Repair protocol where it does not support requesting coding
277 : shreds. FEC resolver requires at least one coding shred to complete,
278 : so this function is intended to be called when the caller knows FEC
279 : set resolver has already received all the data shreds, but hasn't
280 : gotten any coding shreds, but the caller has no way to recover the
281 : coding shreds and make forward progress using the data shreds it does
282 : already have available.
283 :
284 : Note that forcing completion greatly reduces the amount of validation
285 : performed on the FEC set. It only checks that the data shreds are
286 : consistent with one another. If validation of the FEC set fails when
287 : completing (other than issues with the last shred that are obviously
288 : wrong eg. shred_idx > FD_REEDSOL_DATA_SHREDS_MAX), then the function,
289 : similar to add_shred, will dump the in-progress FEC and add it to the
290 : free list. Caller should account for this ensure they only
291 : force_complete when they are certain last shred is the in fact the
292 : last shred, or the consequence is the entire FEC might be incorrectly
293 : discarded too early.
294 :
295 : The last_shred is used to derive the data_shred_cnt which is
296 : otherwise only available after receiving a coding shred. It is an
297 : error to force completion of a FEC set that has already received at
298 : least one parity shred. */
299 :
300 : int
301 : fd_fec_resolver_force_complete( fd_fec_resolver_t * resolver,
302 : fd_shred_t const * last_shred,
303 : fd_fec_set_t const ** out_fec_set,
304 : fd_bmtree_node_t * out_merkle_root );
305 :
306 : void * fd_fec_resolver_leave( fd_fec_resolver_t * resolver );
307 : void * fd_fec_resolver_delete( void * shmem );
308 :
309 : FD_PROTOTYPES_END
310 : #endif /* HEADER_fd_src_disco_shred_fd_fec_resolver_h */
|