Line data Source code
1 : #ifndef HEADER_fd_src_funk_fd_funk_rec_h
2 : #define HEADER_fd_src_funk_fd_funk_rec_h
3 :
4 : /* This provides APIs for managing funk records. It is generally not
5 : meant to be included directly. Use fd_funk.h instead.
6 :
7 : The following APIs are thread safe and can be interleaved arbirarily
8 : across threads:
9 : fd_funk_rec_query_try
10 : fd_funk_rec_query_test
11 : fd_funk_rec_query_try_global
12 : fd_funk_rec_prepare
13 : fd_funk_rec_publish
14 : fd_funk_rec_cancel
15 : fd_funk_rec_remove
16 : */
17 :
18 : #include "fd_funk_txn.h" /* Includes fd_funk_base.h */
19 :
20 : /* FD_FUNK_REC_{ALIGN,FOOTPRINT} describe the alignment and footprint of
21 : a fd_funk_rec_t. ALIGN will be a power of 2, footprint will be a
22 : multiple of align. These are provided to facilitate compile time
23 : declarations. */
24 :
25 : #define FD_FUNK_REC_ALIGN (64UL)
26 :
27 : /* FD_FUNK_REC_FLAG_* are flags that can be bit-ored together to specify
28 : how records are to be interpreted. The 5 most significant bytes of a
29 : rec's flag are reserved to be used in conjunction with the ERASE flag.
30 :
31 : - ERASE indicates a record in an in-preparation transaction should be
32 : erased if and when the in-preparation transaction is published. If
33 : set on a published record, it serves as a tombstone.
34 : If set, there will be no value resources used by this record. */
35 :
36 18799170 : #define FD_FUNK_REC_FLAG_ERASE (1UL<<0)
37 :
38 : /* FD_FUNK_REC_IDX_NULL gives the map record idx value used to represent
39 : NULL. This value also set a limit on how large rec_max can be. */
40 :
41 1148709938 : #define FD_FUNK_REC_IDX_NULL (UINT_MAX)
42 :
43 : /* A fd_funk_rec_t describes a funk record. */
44 :
45 : struct __attribute__((aligned(FD_FUNK_REC_ALIGN))) fd_funk_rec {
46 :
47 : /* These fields are managed by the funk's rec_map */
48 :
49 : fd_funk_xid_key_pair_t pair; /* Transaction id and record key pair */
50 : uint map_next; /* Internal use by map */
51 : ulong map_hash; /* Internal use by map */
52 :
53 : /* These fields are managed by funk. TODO: Consider using record
54 : index compression here (much more debatable than in txn itself). */
55 :
56 : uint prev_idx; /* Record map index of previous record in its transaction */
57 : uint next_idx; /* Record map index of next record in its transaction */
58 :
59 : /* UNUSED: reserved for future use by the Funk LRU. Do not remove or modify. */
60 : uint accounts_lru_prev_idx; /* Record map idx of the next record in the accounts LRU dlist */
61 : uint accounts_lru_next_idx; /* Record map idx of the prev record in the accounts LRU dlist */
62 :
63 : uint txn_cidx; /* Compressed transaction map index (or compressed FD_FUNK_TXN_IDX if this is in the last published) */
64 : uint tag; /* Internal use only */
65 : ulong flags; /* Flags that indicate how to interpret a record */
66 :
67 : /* Note: use of uint here requires FD_FUNK_REC_VAL_MAX to be at most
68 : UINT_MAX. */
69 :
70 : uint val_sz; /* Num bytes in record value, in [0,val_max] */
71 : uint val_max; /* Max byte in record value, in [0,FD_FUNK_REC_VAL_MAX], 0 if erase flag set or val_gaddr is 0 */
72 : ulong val_gaddr; /* Wksp gaddr on record value if any, 0 if erase flag set or val_max is 0
73 : If non-zero, the region [val_gaddr,val_gaddr+val_max) will be a current fd_alloc allocation (such that it is
74 : has tag wksp_tag) and the owner of the region will be the record. The allocator is
75 : fd_funk_alloc(). IMPORTANT! HAS NO GUARANTEED ALIGNMENT! */
76 :
77 : /* Padding to FD_FUNK_REC_ALIGN here */
78 : };
79 :
80 : typedef struct fd_funk_rec fd_funk_rec_t;
81 :
82 : FD_STATIC_ASSERT( sizeof(fd_funk_rec_t) == 2U*FD_FUNK_REC_ALIGN, record size is wrong );
83 :
84 : /* fd_funk_rec_map allows for indexing records by their (xid,key) pair.
85 : It is used to store all records of the last published transaction and
86 : the records being updated for a transaction that is in-preparation.
87 : Published records are stored under the pair (root,key). (This is
88 : done so that publishing a transaction doesn't require updating all
89 : transaction id of all the records that were not updated by the
90 : publish.) */
91 :
92 : #define POOL_NAME fd_funk_rec_pool
93 : #define POOL_ELE_T fd_funk_rec_t
94 : #define POOL_IDX_T uint
95 : #define POOL_NEXT map_next
96 : #define POOL_IMPL_STYLE 1
97 : #include "../util/tmpl/fd_pool_para.c"
98 :
99 : #define MAP_NAME fd_funk_rec_map
100 112088972 : #define MAP_ELE_T fd_funk_rec_t
101 : #define MAP_KEY_T fd_funk_xid_key_pair_t
102 : #define MAP_KEY pair
103 785325545 : #define MAP_KEY_EQ(k0,k1) fd_funk_xid_key_pair_eq((k0),(k1))
104 523955213 : #define MAP_KEY_HASH(k0,seed) fd_funk_xid_key_pair_hash((k0),(seed))
105 : #define MAP_IDX_T uint
106 112088972 : #define MAP_NEXT map_next
107 : #define MAP_MEMO map_hash
108 : #define MAP_MAGIC (0xf173da2ce77ecdb0UL) /* Firedancer rec db version 0 */
109 : #define MAP_MEMOIZE 1
110 : #define MAP_IMPL_STYLE 1
111 : #include "../util/tmpl/fd_map_chain_para.c"
112 : #undef MAP_MEMOIZE
113 : #undef MAP_HASH
114 :
115 : typedef fd_funk_rec_map_query_t fd_funk_rec_query_t;
116 :
117 : /* fd_funk_rec_prepare_t represents a new record that has been
118 : prepared but not inserted into the map yet. See documentation for
119 : fd_funk_rec_prepare. */
120 :
121 : struct _fd_funk_rec_prepare {
122 : fd_funk_rec_t * rec;
123 : uint * rec_head_idx;
124 : uint * rec_tail_idx;
125 : uchar * txn_lock;
126 : };
127 :
128 : typedef struct _fd_funk_rec_prepare fd_funk_rec_prepare_t;
129 :
130 : FD_PROTOTYPES_BEGIN
131 :
132 : /* fd_funk_rec_idx_is_null returns 1 if idx is FD_FUNK_REC_IDX_NULL and
133 : 0 otherwise. */
134 :
135 1072505507 : FD_FN_CONST static inline int fd_funk_rec_idx_is_null( uint idx ) { return idx==FD_FUNK_REC_IDX_NULL; }
136 :
137 : /* Accessors */
138 :
139 : /* fd_funk_rec_modify attempts to modify the record corresponding to the
140 : given key in the given transaction. If the record does not exist,
141 : NULL will be returned. If the txn is NULL, the query will be done
142 : against funk's last published transaction (the root). On success,
143 : a mutable pointer to the funk record is returned.
144 :
145 : Assumes funk is a current local join (NULL returns NULL), txn is NULL
146 : or points to an in-preparation transaction in the caller's address
147 : space, key points to a record key in the caller's address space (NULL
148 : returns NULL). It is SAFE to do concurrent operations on funk with
149 : fd_funk_rec_modify.
150 :
151 : If there is contention for this record (or any records that are
152 : hashed to same chain as this record), the function will block the
153 : caller until the contention is resolved.
154 :
155 : A call to fd_funk_rec_modify must be followed by a call to
156 : fd_funk_rec_modify_publish.
157 :
158 : The query argument remembers the query for later validity testing.
159 :
160 : Important safety tips:
161 :
162 : 1. This function can encounter records that have the ERASE flag set
163 : (i.e. are tombstones of erased records). fd_funk_rec_query_try will
164 : still return the record in this case, and the application should
165 : check for the flag.
166 :
167 : 2. This function will not error if a caller attempts to modify a
168 : record from a non-current transaction (i.e. any funk transaction
169 : with a child). However, the caller should NEVER do this as it
170 : violates funk's invariants. */
171 :
172 : fd_funk_rec_t *
173 : fd_funk_rec_modify( fd_funk_t * funk,
174 : fd_funk_txn_t const * txn,
175 : fd_funk_rec_key_t const * key,
176 : fd_funk_rec_query_t * query );
177 :
178 : /* fd_funk_rec_modify_publish commits any modifications to the record
179 : done by fd_funk_rec_modify. All notes from fd_funk_rec_modify
180 : apply. Calling fd_funk_rec_modify_publish is required and is
181 : responsible for freeing the lock on the record (and the hash
182 : chain). */
183 :
184 : void
185 : fd_funk_rec_modify_publish( fd_funk_rec_query_t * query );
186 :
187 :
188 : /* fd_funk_rec_query_try queries the in-preparation transaction pointed to
189 : by txn for the record whose key matches the key pointed to by key.
190 : If txn is NULL, the query will be done for the funk's last published
191 : transaction. Returns a pointer to current record on success and NULL
192 : on failure. Reasons for failure include txn is neither NULL nor a
193 : pointer to a in-preparation transaction, key is NULL or not a record
194 : in the given transaction.
195 :
196 : The returned pointer is in the caller's address space if the
197 : return value is non-NULL.
198 :
199 : Assumes funk is a current local join (NULL returns NULL), txn is NULL
200 : or points to an in-preparation transaction in the caller's address
201 : space, key points to a record key in the caller's address space (NULL
202 : returns NULL), and no concurrent operations on funk, txn or key.
203 : funk retains no interest in key. The funk retains ownership of any
204 : returned record.
205 :
206 : The query argument remembers the query for later validity testing.
207 :
208 : This is reasonably fast O(1).
209 :
210 : Important safety tip! This function can encounter records
211 : that have the ERASE flag set (i.e. are tombstones of erased
212 : records). fd_funk_rec_query_try will still return the record in this
213 : case, and the application should check for the flag. */
214 :
215 : fd_funk_rec_t const *
216 : fd_funk_rec_query_try( fd_funk_t * funk,
217 : fd_funk_txn_t const * txn,
218 : fd_funk_rec_key_t const * key,
219 : fd_funk_rec_query_t * query );
220 :
221 : /* fd_funk_rec_query_test returns SUCCESS if a prior query still has a
222 : valid result. The coding pattern is:
223 :
224 : for(;;) {
225 : fd_funk_rec_query_t query[1];
226 : fd_funk_rec_t * rec = fd_funk_rec_query_try( funk, txn, key, query );
227 : ... Optimistically read record value ...
228 : if( fd_funk_rec_query_test( query ) == FD_FUNK_SUCCESS ) break;
229 : ... Clean up and try again ...
230 : }
231 : */
232 :
233 : int fd_funk_rec_query_test( fd_funk_rec_query_t * query );
234 :
235 : /* fd_funk_rec_query_try_global is the same as fd_funk_rec_query_try but
236 : will query txn's ancestors for key from youngest to oldest if key is
237 : not part of txn. As such, the txn of the returned record may not
238 : match txn but will be the txn of most recent ancestor with the key
239 : otherwise. *txn_out is set to the transaction where the record was
240 : found.
241 :
242 : This is reasonably fast O(in_prep_ancestor_cnt).
243 :
244 : Important safety tip! This function can encounter records
245 : that have the ERASE flag set (i.e. are tombstones of erased
246 : records). fd_funk_rec_query_try_global will return a NULL in this case
247 : but still set *txn_out to the relevant transaction. This behavior
248 : differs from fd_funk_rec_query_try.
249 :
250 : TODO: This function should be renamed to fd_funk_rec_query_try
251 : and fd_funk_rec_query_try should be renamed to
252 : fd_funk_rec_query_try_strict. */
253 : fd_funk_rec_t const *
254 : fd_funk_rec_query_try_global( fd_funk_t const * funk,
255 : fd_funk_txn_t const * txn,
256 : fd_funk_rec_key_t const * key,
257 : fd_funk_txn_t const ** txn_out,
258 : fd_funk_rec_query_t * query );
259 :
260 : /* fd_funk_rec_query_copy queries the in-preparation transaction pointed to
261 : by txn for the record whose key matches the key pointed to by key.
262 :
263 : The contents of the record are safely copied into space allocated
264 : with the valloc, and a pointer to that space is returned. If there
265 : is an error, NULL is returned. The size of the record is returned
266 : in sz_out. */
267 :
268 : fd_funk_rec_t const *
269 : fd_funk_rec_query_copy( fd_funk_t * funk,
270 : fd_funk_txn_t const * txn,
271 : fd_funk_rec_key_t const * key,
272 : fd_valloc_t valloc,
273 : ulong * sz_out );
274 :
275 : /* fd_funk_rec_{pair,xid,key} returns a pointer in the local address
276 : space of the {(transaction id,record key) pair,transaction id,record
277 : key} of a live record. Assumes rec points to a live record in the
278 : caller's address space. The lifetime of the returned pointer is the
279 : same as rec. The value at the pointer will be constant for its
280 : lifetime. */
281 :
282 20582349 : FD_FN_CONST static inline fd_funk_xid_key_pair_t const * fd_funk_rec_pair( fd_funk_rec_t const * rec ) { return &rec->pair; }
283 937383108 : FD_FN_CONST static inline fd_funk_txn_xid_t const * fd_funk_rec_xid ( fd_funk_rec_t const * rec ) { return rec->pair.xid; }
284 1251683460 : FD_FN_CONST static inline fd_funk_rec_key_t const * fd_funk_rec_key ( fd_funk_rec_t const * rec ) { return rec->pair.key; }
285 :
286 : /* fd_funk_rec_prepare prepares to insert a new record. This call just
287 : allocates a record from the pool and initializes it.
288 : The application should then fill in the new
289 : value. fd_funk_rec_publish actually does the map insert and
290 : should be called once the value is correct. */
291 :
292 : fd_funk_rec_t *
293 : fd_funk_rec_prepare( fd_funk_t * funk,
294 : fd_funk_txn_t * txn,
295 : fd_funk_rec_key_t const * key,
296 : fd_funk_rec_prepare_t * prepare,
297 : int * opt_err );
298 :
299 : /* fd_funk_rec_publish inserts a prepared record into the record map. */
300 :
301 : void
302 : fd_funk_rec_publish( fd_funk_t * funk,
303 : fd_funk_rec_prepare_t * prepare );
304 :
305 : /* fd_funk_rec_cancel returns a prepared record to the pool without
306 : inserting it. */
307 :
308 : void
309 : fd_funk_rec_cancel( fd_funk_t * funk,
310 : fd_funk_rec_prepare_t * prepare );
311 :
312 : /* fd_funk_rec_clone copies a record from an ancestor transaction
313 : to create a new record in the given transaction. The record can be
314 : modified afterward and must then be published.
315 :
316 : NOTE: fd_funk_rec_clone is NOT thread safe and should not be used
317 : concurrently with other funk read/write operations.
318 :
319 : FIXME: This function should be removed in favor of
320 : fd_funk_rec_try_clone_safe. */
321 :
322 : fd_funk_rec_t *
323 : fd_funk_rec_clone( fd_funk_t * funk,
324 : fd_funk_txn_t * txn,
325 : fd_funk_rec_key_t const * key,
326 : fd_funk_rec_prepare_t * prepare,
327 : int * opt_err );
328 :
329 : /* fd_funk_rec_try_clone_safe is the thread-safe analog to
330 : fd_funk_rec_clone. This function will try to atomically query and
331 : copy the given funk record from the youngest ancestor transaction
332 : of the given txn and copy it into a new record of the same key into
333 : the current funk txn.
334 :
335 : Detailed Behavior:
336 :
337 : More specifically, first this function will query the transaction
338 : stack to identify what the youngest transaction with the key is. If
339 : a record is found in the current transaction then it will exit.
340 : In the case where a record is found in some ancestor txn or if the
341 : record doesn't exist, the function will then acquire a lock on the
342 : keypair of the youngest ancestor account (if it exists) and the
343 : keypair of the account we want to create. These two keys are
344 : guaranteed to be on the same hash chain so in practice we will just
345 : be locking the hash chain for the key.
346 :
347 : Once this lock is acquired, we will query the keypair for the keypair
348 : we are going to create to make sure that it wasn't added in the time
349 : that we were attempting to acquire the lock. If a keypair is found,
350 : we will free the lock and exit the function.
351 :
352 : Otherwise, we will allocate the account and copy over the data from
353 : the ancestor record. Now we will add this into the record map. At
354 : this point, we can now free the lock on the hash chain.
355 :
356 : The caller can specify the alignment and min_sz they would like for
357 : the value of the record. If the caller wishes to use default
358 : alignment, they can pass 0UL (see fd_funk_val_truncate() for more
359 : details). */
360 :
361 : void
362 : fd_funk_rec_try_clone_safe( fd_funk_t * funk,
363 : fd_funk_txn_t * txn,
364 : fd_funk_rec_key_t const * key,
365 : ulong align,
366 : ulong min_sz );
367 :
368 :
369 : /* fd_funk_rec_remove removes the live record with the
370 : given (xid,key) from funk. Returns FD_FUNK_SUCCESS (0) on
371 : success and an FD_FUNK_ERR_* (negative) on failure. Reasons for
372 : failure include:
373 :
374 : FD_FUNK_ERR_INVAL - bad inputs (NULL funk, NULL xid)
375 :
376 : FD_FUNK_ERR_KEY - the record did not appear to be a live record.
377 : Specifically, a record query of funk for rec's (xid,key) pair did
378 : not return rec.
379 :
380 : The record will cease to exist in that transaction and any of
381 : transaction's subsequently created descendants (again, assuming no
382 : subsequent insert of key). This type of remove can be done on a
383 : published record (assuming the last published transaction is
384 : unfrozen). A tombstone is left in funk to track removals as they
385 : are published or cancelled.
386 :
387 : Any information in an erased record is lost.
388 :
389 : This is a reasonably fast O(1) and fortified against memory
390 : corruption. */
391 :
392 : int
393 : fd_funk_rec_remove( fd_funk_t * funk,
394 : fd_funk_txn_t * txn,
395 : fd_funk_rec_key_t const * key,
396 : fd_funk_rec_t ** rec_out,
397 : ulong erase_data );
398 :
399 : /*
400 : fd_funk_rec_hard_remove completely removes the record from Funk,
401 : and leaves no tombstone behind.
402 :
403 : This is a dangerous API. An older version of the record in a
404 : parent transaction might be exposed. In other words, the record may
405 : appear to go backwards in time. We are effectively reverting an
406 : update. Any information in an removed record is lost.
407 :
408 : Always succeeds.
409 : */
410 : void
411 : fd_funk_rec_hard_remove( fd_funk_t * funk,
412 : fd_funk_txn_t * txn,
413 : fd_funk_rec_key_t const * key );
414 :
415 : /* When a record is erased there is metadata stored in the five most
416 : significant bytes of record flags. These are helpers to make setting
417 : and getting these values simple. The caller is responsible for doing
418 : a check on the flag of the record before using the value of the erase
419 : data. The 5 least significant bytes of the erase data parameter will
420 : be used and set into the erase flag. */
421 :
422 : void
423 : fd_funk_rec_set_erase_data( fd_funk_rec_t * rec, ulong erase_data );
424 :
425 : ulong
426 : fd_funk_rec_get_erase_data( fd_funk_rec_t const * rec );
427 :
428 : /* Remove a list of tombstones from funk, thereby freeing up space in
429 : the main index. All the records must be removed and published
430 : beforehand. Reasons for failure include:
431 :
432 : FD_FUNK_ERR_INVAL - bad inputs (NULL funk, NULL rec, rec is
433 : obviously not from funk, etc)
434 :
435 : FD_FUNK_ERR_KEY - the record did not appear to be a removed record.
436 : Specifically, a record query of funk for rec's (xid,key) pair did
437 : not return rec. Also, the record was never published.
438 : */
439 : int
440 : fd_funk_rec_forget( fd_funk_t * funk,
441 : fd_funk_rec_t ** recs,
442 : ulong recs_cnt );
443 :
444 : /* fd_funk_all_iter_t iterators over all funk record objects in all funk
445 : transactions.
446 :
447 : Assumes that no other join is doing funk write accesses during the
448 : lifetime of the iterator object. This API is not optimized for
449 : performance and has a high fixed cost (slow even for empty DBs).
450 :
451 : Usage is:
452 :
453 : fd_funk_all_iter_t iter[1];
454 : for( fd_funk_all_iter_new( funk, iter ); !fd_funk_all_iter_done( iter ); fd_funk_all_iter_next( iter ) ) {
455 : fd_funk_rec_t const * rec = fd_funk_all_iter_ele_const( iter );
456 : ...
457 : } */
458 :
459 : struct fd_funk_all_iter {
460 : fd_funk_rec_map_t rec_map;
461 : ulong chain_cnt;
462 : ulong chain_idx;
463 : fd_funk_rec_map_iter_t rec_map_iter;
464 : };
465 :
466 : typedef struct fd_funk_all_iter fd_funk_all_iter_t;
467 :
468 : void fd_funk_all_iter_new( fd_funk_t * funk, fd_funk_all_iter_t * iter );
469 :
470 : int fd_funk_all_iter_done( fd_funk_all_iter_t * iter );
471 :
472 : void fd_funk_all_iter_next( fd_funk_all_iter_t * iter );
473 :
474 : fd_funk_rec_t const * fd_funk_all_iter_ele_const( fd_funk_all_iter_t * iter );
475 :
476 : fd_funk_rec_t * fd_funk_all_iter_ele( fd_funk_all_iter_t * iter );
477 :
478 : /* Misc */
479 :
480 : /* fd_funk_rec_verify verifies the record map. Returns FD_FUNK_SUCCESS
481 : if the record map appears intact and FD_FUNK_ERR_INVAL if not (logs
482 : details). Meant to be called as part of fd_funk_verify. As such, it
483 : assumes funk is non-NULL, fd_funk_{wksp,txn_map,rec_map} have been
484 : verified to work and the txn_map has been verified. */
485 :
486 : int
487 : fd_funk_rec_verify( fd_funk_t * funk );
488 :
489 : FD_PROTOTYPES_END
490 :
491 : #endif /* HEADER_fd_src_funk_fd_funk_rec_h */
|