Line data Source code
1 : #ifndef HEADER_fd_src_funk_fd_funk_rec_h
2 : #define HEADER_fd_src_funk_fd_funk_rec_h
3 :
4 : /* This provides APIs for managing funk records. It is generally not
5 : meant to be included directly. Use fd_funk.h instead.
6 :
7 : The following APIs are thread safe and can be interleaved arbirarily
8 : across threads:
9 : fd_funk_rec_query_try
10 : fd_funk_rec_query_test
11 : fd_funk_rec_query_try_global
12 : fd_funk_rec_prepare
13 : fd_funk_rec_publish
14 : fd_funk_rec_cancel
15 : fd_funk_rec_remove
16 : */
17 :
18 : #include "fd_funk_txn.h" /* Includes fd_funk_base.h */
19 :
20 : /* FD_FUNK_REC_{ALIGN,FOOTPRINT} describe the alignment and footprint of
21 : a fd_funk_rec_t. ALIGN will be a power of 2, footprint will be a
22 : multiple of align. These are provided to facilitate compile time
23 : declarations. */
24 :
25 : #define FD_FUNK_REC_ALIGN (64UL)
26 :
27 : /* FD_FUNK_REC_FLAG_* are flags that can be bit-ored together to specify
28 : how records are to be interpreted. The 5 most significant bytes of a
29 : rec's flag are reserved to be used in conjunction with the ERASE flag.
30 :
31 : - ERASE indicates a record in an in-preparation transaction should be
32 : erased if and when the in-preparation transaction is published. If
33 : set on a published record, it serves as a tombstone.
34 : If set, there will be no value resources used by this record. */
35 :
36 18799170 : #define FD_FUNK_REC_FLAG_ERASE (1UL<<0)
37 :
38 : /* FD_FUNK_REC_IDX_NULL gives the map record idx value used to represent
39 : NULL. This value also set a limit on how large rec_max can be. */
40 :
41 1141302060 : #define FD_FUNK_REC_IDX_NULL (UINT_MAX)
42 :
43 : /* A fd_funk_rec_t describes a funk record. */
44 :
45 : struct __attribute__((aligned(FD_FUNK_REC_ALIGN))) fd_funk_rec {
46 :
47 : /* These fields are managed by the funk's rec_map */
48 :
49 : fd_funk_xid_key_pair_t pair; /* Transaction id and record key pair */
50 : uint map_next; /* Internal use by map */
51 : ulong map_hash; /* Internal use by map */
52 :
53 : /* These fields are managed by funk. TODO: Consider using record
54 : index compression here (much more debatable than in txn itself). */
55 :
56 : uint prev_idx; /* Record map index of previous record in its transaction */
57 : uint next_idx; /* Record map index of next record in its transaction */
58 :
59 : uint txn_cidx; /* Compressed transaction map index (or compressed FD_FUNK_TXN_IDX if this is in the last published) */
60 : uint tag; /* Internal use only */
61 : ulong flags; /* Flags that indicate how to interpret a record */
62 :
63 : /* Note: use of uint here requires FD_FUNK_REC_VAL_MAX to be at most
64 : UINT_MAX. */
65 :
66 : uint val_sz; /* Num bytes in record value, in [0,val_max] */
67 : uint val_max; /* Max byte in record value, in [0,FD_FUNK_REC_VAL_MAX], 0 if erase flag set or val_gaddr is 0 */
68 : ulong val_gaddr; /* Wksp gaddr on record value if any, 0 if erase flag set or val_max is 0
69 : If non-zero, the region [val_gaddr,val_gaddr+val_max) will be a current fd_alloc allocation (such that it is
70 : has tag wksp_tag) and the owner of the region will be the record. The allocator is
71 : fd_funk_alloc(). IMPORTANT! HAS NO GUARANTEED ALIGNMENT! */
72 :
73 : /* Padding to FD_FUNK_REC_ALIGN here */
74 : };
75 :
76 : typedef struct fd_funk_rec fd_funk_rec_t;
77 :
78 : FD_STATIC_ASSERT( sizeof(fd_funk_rec_t) == 2U*FD_FUNK_REC_ALIGN, record size is wrong );
79 :
80 : /* fd_funk_rec_map allows for indexing records by their (xid,key) pair.
81 : It is used to store all records of the last published transaction and
82 : the records being updated for a transaction that is in-preparation.
83 : Published records are stored under the pair (root,key). (This is
84 : done so that publishing a transaction doesn't require updating all
85 : transaction id of all the records that were not updated by the
86 : publish.) */
87 :
88 : #define POOL_NAME fd_funk_rec_pool
89 : #define POOL_ELE_T fd_funk_rec_t
90 : #define POOL_IDX_T uint
91 : #define POOL_NEXT map_next
92 : #define POOL_IMPL_STYLE 1
93 : #include "../util/tmpl/fd_pool_para.c"
94 :
95 : #define MAP_NAME fd_funk_rec_map
96 112088969 : #define MAP_ELE_T fd_funk_rec_t
97 : #define MAP_KEY_T fd_funk_xid_key_pair_t
98 : #define MAP_KEY pair
99 782454576 : #define MAP_KEY_EQ(k0,k1) fd_funk_xid_key_pair_eq((k0),(k1))
100 519463735 : #define MAP_KEY_HASH(k0,seed) fd_funk_xid_key_pair_hash((k0),(seed))
101 : #define MAP_IDX_T uint
102 112088969 : #define MAP_NEXT map_next
103 : #define MAP_MEMO map_hash
104 : #define MAP_MAGIC (0xf173da2ce77ecdb0UL) /* Firedancer rec db version 0 */
105 : #define MAP_MEMOIZE 1
106 : #define MAP_IMPL_STYLE 1
107 : #include "../util/tmpl/fd_map_chain_para.c"
108 : #undef MAP_MEMOIZE
109 : #undef MAP_HASH
110 :
111 : typedef fd_funk_rec_map_query_t fd_funk_rec_query_t;
112 :
113 : /* fd_funk_rec_prepare_t represents a new record that has been
114 : prepared but not inserted into the map yet. See documentation for
115 : fd_funk_rec_prepare. */
116 :
117 : struct _fd_funk_rec_prepare {
118 : fd_funk_rec_t * rec;
119 : uint * rec_head_idx;
120 : uint * rec_tail_idx;
121 : uchar * txn_lock;
122 : };
123 :
124 : typedef struct _fd_funk_rec_prepare fd_funk_rec_prepare_t;
125 :
126 : FD_PROTOTYPES_BEGIN
127 :
128 : /* fd_funk_rec_idx_is_null returns 1 if idx is FD_FUNK_REC_IDX_NULL and
129 : 0 otherwise. */
130 :
131 1069589098 : FD_FN_CONST static inline int fd_funk_rec_idx_is_null( uint idx ) { return idx==FD_FUNK_REC_IDX_NULL; }
132 :
133 : /* Accessors */
134 :
135 : /* fd_funk_rec_modify attempts to modify the record corresponding to the
136 : given key in the given transaction. If the record does not exist,
137 : NULL will be returned. If the txn is NULL, the query will be done
138 : against funk's last published transaction (the root). On success,
139 : a mutable pointer to the funk record is returned.
140 :
141 : Assumes funk is a current local join (NULL returns NULL), txn is NULL
142 : or points to an in-preparation transaction in the caller's address
143 : space, key points to a record key in the caller's address space (NULL
144 : returns NULL). It is SAFE to do concurrent operations on funk with
145 : fd_funk_rec_modify.
146 :
147 : If there is contention for this record (or any records that are
148 : hashed to same chain as this record), the function will block the
149 : caller until the contention is resolved.
150 :
151 : A call to fd_funk_rec_modify must be followed by a call to
152 : fd_funk_rec_modify_publish.
153 :
154 : The query argument remembers the query for later validity testing.
155 :
156 : Important safety tips:
157 :
158 : 1. This function can encounter records that have the ERASE flag set
159 : (i.e. are tombstones of erased records). fd_funk_rec_query_try will
160 : still return the record in this case, and the application should
161 : check for the flag.
162 :
163 : 2. This function will not error if a caller attempts to modify a
164 : record from a non-current transaction (i.e. any funk transaction
165 : with a child). However, the caller should NEVER do this as it
166 : violates funk's invariants. */
167 :
168 : fd_funk_rec_t *
169 : fd_funk_rec_modify( fd_funk_t * funk,
170 : fd_funk_txn_t const * txn,
171 : fd_funk_rec_key_t const * key,
172 : fd_funk_rec_query_t * query );
173 :
174 : /* fd_funk_rec_modify_publish commits any modifications to the record
175 : done by fd_funk_rec_modify. All notes from fd_funk_rec_modify
176 : apply. Calling fd_funk_rec_modify_publish is required and is
177 : responsible for freeing the lock on the record (and the hash
178 : chain). */
179 :
180 : void
181 : fd_funk_rec_modify_publish( fd_funk_rec_query_t * query );
182 :
183 :
184 : /* fd_funk_rec_query_try queries the in-preparation transaction pointed to
185 : by txn for the record whose key matches the key pointed to by key.
186 : If txn is NULL, the query will be done for the funk's last published
187 : transaction. Returns a pointer to current record on success and NULL
188 : on failure. Reasons for failure include txn is neither NULL nor a
189 : pointer to a in-preparation transaction, key is NULL or not a record
190 : in the given transaction.
191 :
192 : The returned pointer is in the caller's address space if the
193 : return value is non-NULL.
194 :
195 : Assumes funk is a current local join (NULL returns NULL), txn is NULL
196 : or points to an in-preparation transaction in the caller's address
197 : space, key points to a record key in the caller's address space (NULL
198 : returns NULL), and no concurrent operations on funk, txn or key.
199 : funk retains no interest in key. The funk retains ownership of any
200 : returned record.
201 :
202 : The query argument remembers the query for later validity testing.
203 :
204 : This is reasonably fast O(1).
205 :
206 : Important safety tip! This function can encounter records
207 : that have the ERASE flag set (i.e. are tombstones of erased
208 : records). fd_funk_rec_query_try will still return the record in this
209 : case, and the application should check for the flag. */
210 :
211 : fd_funk_rec_t const *
212 : fd_funk_rec_query_try( fd_funk_t * funk,
213 : fd_funk_txn_t const * txn,
214 : fd_funk_rec_key_t const * key,
215 : fd_funk_rec_query_t * query );
216 :
217 : /* fd_funk_rec_query_test returns SUCCESS if a prior query still has a
218 : valid result. The coding pattern is:
219 :
220 : for(;;) {
221 : fd_funk_rec_query_t query[1];
222 : fd_funk_rec_t * rec = fd_funk_rec_query_try( funk, txn, key, query );
223 : ... Optimistically read record value ...
224 : if( fd_funk_rec_query_test( query ) == FD_FUNK_SUCCESS ) break;
225 : ... Clean up and try again ...
226 : }
227 : */
228 :
229 : int fd_funk_rec_query_test( fd_funk_rec_query_t * query );
230 :
231 : /* fd_funk_rec_query_try_global is the same as fd_funk_rec_query_try but
232 : will query txn's ancestors for key from youngest to oldest if key is
233 : not part of txn. As such, the txn of the returned record may not
234 : match txn but will be the txn of most recent ancestor with the key
235 : otherwise. *txn_out is set to the transaction where the record was
236 : found.
237 :
238 : This is reasonably fast O(in_prep_ancestor_cnt).
239 :
240 : Important safety tip! This function can encounter records
241 : that have the ERASE flag set (i.e. are tombstones of erased
242 : records). fd_funk_rec_query_try_global will return a NULL in this case
243 : but still set *txn_out to the relevant transaction. This behavior
244 : differs from fd_funk_rec_query_try.
245 :
246 : TODO: This function should be renamed to fd_funk_rec_query_try
247 : and fd_funk_rec_query_try should be renamed to
248 : fd_funk_rec_query_try_strict. */
249 : fd_funk_rec_t const *
250 : fd_funk_rec_query_try_global( fd_funk_t const * funk,
251 : fd_funk_txn_t const * txn,
252 : fd_funk_rec_key_t const * key,
253 : fd_funk_txn_t const ** txn_out,
254 : fd_funk_rec_query_t * query );
255 :
256 : /* fd_funk_rec_query_copy queries the in-preparation transaction pointed to
257 : by txn for the record whose key matches the key pointed to by key.
258 :
259 : The contents of the record are safely copied into space allocated
260 : with the valloc, and a pointer to that space is returned. If there
261 : is an error, NULL is returned. The size of the record is returned
262 : in sz_out. */
263 :
264 : fd_funk_rec_t const *
265 : fd_funk_rec_query_copy( fd_funk_t * funk,
266 : fd_funk_txn_t const * txn,
267 : fd_funk_rec_key_t const * key,
268 : fd_valloc_t valloc,
269 : ulong * sz_out );
270 :
271 : /* fd_funk_rec_{pair,xid,key} returns a pointer in the local address
272 : space of the {(transaction id,record key) pair,transaction id,record
273 : key} of a live record. Assumes rec points to a live record in the
274 : caller's address space. The lifetime of the returned pointer is the
275 : same as rec. The value at the pointer will be constant for its
276 : lifetime. */
277 :
278 19065713 : FD_FN_CONST static inline fd_funk_xid_key_pair_t const * fd_funk_rec_pair( fd_funk_rec_t const * rec ) { return &rec->pair; }
279 937383108 : FD_FN_CONST static inline fd_funk_txn_xid_t const * fd_funk_rec_xid ( fd_funk_rec_t const * rec ) { return rec->pair.xid; }
280 1251683460 : FD_FN_CONST static inline fd_funk_rec_key_t const * fd_funk_rec_key ( fd_funk_rec_t const * rec ) { return rec->pair.key; }
281 :
282 : /* fd_funk_rec_prepare prepares to insert a new record. This call just
283 : allocates a record from the pool and initializes it.
284 : The application should then fill in the new
285 : value. fd_funk_rec_publish actually does the map insert and
286 : should be called once the value is correct. */
287 :
288 : fd_funk_rec_t *
289 : fd_funk_rec_prepare( fd_funk_t * funk,
290 : fd_funk_txn_t * txn,
291 : fd_funk_rec_key_t const * key,
292 : fd_funk_rec_prepare_t * prepare,
293 : int * opt_err );
294 :
295 : /* fd_funk_rec_publish inserts a prepared record into the record map. */
296 :
297 : void
298 : fd_funk_rec_publish( fd_funk_t * funk,
299 : fd_funk_rec_prepare_t * prepare );
300 :
301 : /* fd_funk_rec_cancel returns a prepared record to the pool without
302 : inserting it. */
303 :
304 : void
305 : fd_funk_rec_cancel( fd_funk_t * funk,
306 : fd_funk_rec_prepare_t * prepare );
307 :
308 : /* fd_funk_rec_clone copies a record from an ancestor transaction
309 : to create a new record in the given transaction. The record can be
310 : modified afterward and must then be published.
311 :
312 : NOTE: fd_funk_rec_clone is NOT thread safe and should not be used
313 : concurrently with other funk read/write operations.
314 :
315 : FIXME: This function should be removed in favor of
316 : fd_funk_rec_try_clone_safe. */
317 :
318 : fd_funk_rec_t *
319 : fd_funk_rec_clone( fd_funk_t * funk,
320 : fd_funk_txn_t * txn,
321 : fd_funk_rec_key_t const * key,
322 : fd_funk_rec_prepare_t * prepare,
323 : int * opt_err );
324 :
325 : /* fd_funk_rec_try_clone_safe is the thread-safe analog to
326 : fd_funk_rec_clone. This function will try to atomically query and
327 : copy the given funk record from the youngest ancestor transaction
328 : of the given txn and copy it into a new record of the same key into
329 : the current funk txn.
330 :
331 : Detailed Behavior:
332 :
333 : More specifically, first this function will query the transaction
334 : stack to identify what the youngest transaction with the key is. If
335 : a record is found in the current transaction then it will exit.
336 : In the case where a record is found in some ancestor txn or if the
337 : record doesn't exist, the function will then acquire a lock on the
338 : keypair of the youngest ancestor account (if it exists) and the
339 : keypair of the account we want to create. These two keys are
340 : guaranteed to be on the same hash chain so in practice we will just
341 : be locking the hash chain for the key.
342 :
343 : Once this lock is acquired, we will query the keypair for the keypair
344 : we are going to create to make sure that it wasn't added in the time
345 : that we were attempting to acquire the lock. If a keypair is found,
346 : we will free the lock and exit the function.
347 :
348 : Otherwise, we will allocate the account and copy over the data from
349 : the ancestor record. Now we will add this into the record map. At
350 : this point, we can now free the lock on the hash chain.
351 :
352 : The caller can specify the alignment and min_sz they would like for
353 : the value of the record. If the caller wishes to use default
354 : alignment, they can pass 0UL (see fd_funk_val_truncate() for more
355 : details). */
356 :
357 : void
358 : fd_funk_rec_try_clone_safe( fd_funk_t * funk,
359 : fd_funk_txn_t * txn,
360 : fd_funk_rec_key_t const * key,
361 : ulong align,
362 : ulong min_sz );
363 :
364 :
365 : /* fd_funk_rec_remove removes the live record with the
366 : given (xid,key) from funk. Returns FD_FUNK_SUCCESS (0) on
367 : success and an FD_FUNK_ERR_* (negative) on failure. Reasons for
368 : failure include:
369 :
370 : FD_FUNK_ERR_INVAL - bad inputs (NULL funk, NULL xid)
371 :
372 : FD_FUNK_ERR_KEY - the record did not appear to be a live record.
373 : Specifically, a record query of funk for rec's (xid,key) pair did
374 : not return rec.
375 :
376 : The record will cease to exist in that transaction and any of
377 : transaction's subsequently created descendants (again, assuming no
378 : subsequent insert of key). This type of remove can be done on a
379 : published record (assuming the last published transaction is
380 : unfrozen). A tombstone is left in funk to track removals as they
381 : are published or cancelled.
382 :
383 : Any information in an erased record is lost.
384 :
385 : This is a reasonably fast O(1) and fortified against memory
386 : corruption. */
387 :
388 : int
389 : fd_funk_rec_remove( fd_funk_t * funk,
390 : fd_funk_txn_t * txn,
391 : fd_funk_rec_key_t const * key,
392 : fd_funk_rec_t ** rec_out,
393 : ulong erase_data );
394 :
395 : /*
396 : fd_funk_rec_hard_remove completely removes the record from Funk,
397 : and leaves no tombstone behind.
398 :
399 : This is a dangerous API. An older version of the record in a
400 : parent transaction might be exposed. In other words, the record may
401 : appear to go backwards in time. We are effectively reverting an
402 : update. Any information in an removed record is lost.
403 :
404 : Always succeeds.
405 : */
406 : void
407 : fd_funk_rec_hard_remove( fd_funk_t * funk,
408 : fd_funk_txn_t * txn,
409 : fd_funk_rec_key_t const * key );
410 :
411 : /* When a record is erased there is metadata stored in the five most
412 : significant bytes of record flags. These are helpers to make setting
413 : and getting these values simple. The caller is responsible for doing
414 : a check on the flag of the record before using the value of the erase
415 : data. The 5 least significant bytes of the erase data parameter will
416 : be used and set into the erase flag. */
417 :
418 : void
419 : fd_funk_rec_set_erase_data( fd_funk_rec_t * rec, ulong erase_data );
420 :
421 : ulong
422 : fd_funk_rec_get_erase_data( fd_funk_rec_t const * rec );
423 :
424 : /* Remove a list of tombstones from funk, thereby freeing up space in
425 : the main index. All the records must be removed and published
426 : beforehand. Reasons for failure include:
427 :
428 : FD_FUNK_ERR_INVAL - bad inputs (NULL funk, NULL rec, rec is
429 : obviously not from funk, etc)
430 :
431 : FD_FUNK_ERR_KEY - the record did not appear to be a removed record.
432 : Specifically, a record query of funk for rec's (xid,key) pair did
433 : not return rec. Also, the record was never published.
434 : */
435 : int
436 : fd_funk_rec_forget( fd_funk_t * funk,
437 : fd_funk_rec_t ** recs,
438 : ulong recs_cnt );
439 :
440 : /* fd_funk_all_iter_t iterators over all funk record objects in all funk
441 : transactions.
442 :
443 : Assumes that no other join is doing funk write accesses during the
444 : lifetime of the iterator object. This API is not optimized for
445 : performance and has a high fixed cost (slow even for empty DBs).
446 :
447 : Usage is:
448 :
449 : fd_funk_all_iter_t iter[1];
450 : for( fd_funk_all_iter_new( funk, iter ); !fd_funk_all_iter_done( iter ); fd_funk_all_iter_next( iter ) ) {
451 : fd_funk_rec_t const * rec = fd_funk_all_iter_ele_const( iter );
452 : ...
453 : } */
454 :
455 : struct fd_funk_all_iter {
456 : fd_funk_rec_map_t rec_map;
457 : ulong chain_cnt;
458 : ulong chain_idx;
459 : fd_funk_rec_map_iter_t rec_map_iter;
460 : };
461 :
462 : typedef struct fd_funk_all_iter fd_funk_all_iter_t;
463 :
464 : void fd_funk_all_iter_new( fd_funk_t * funk, fd_funk_all_iter_t * iter );
465 :
466 : int fd_funk_all_iter_done( fd_funk_all_iter_t * iter );
467 :
468 : void fd_funk_all_iter_next( fd_funk_all_iter_t * iter );
469 :
470 : fd_funk_rec_t const * fd_funk_all_iter_ele_const( fd_funk_all_iter_t * iter );
471 :
472 : fd_funk_rec_t * fd_funk_all_iter_ele( fd_funk_all_iter_t * iter );
473 :
474 : /* Misc */
475 :
476 : /* fd_funk_rec_verify verifies the record map. Returns FD_FUNK_SUCCESS
477 : if the record map appears intact and FD_FUNK_ERR_INVAL if not (logs
478 : details). Meant to be called as part of fd_funk_verify. As such, it
479 : assumes funk is non-NULL, fd_funk_{wksp,txn_map,rec_map} have been
480 : verified to work and the txn_map has been verified. */
481 :
482 : int
483 : fd_funk_rec_verify( fd_funk_t * funk );
484 :
485 : int
486 : fd_funk_rec_purify( fd_funk_t * funk );
487 :
488 : FD_PROTOTYPES_END
489 :
490 : #endif /* HEADER_fd_src_funk_fd_funk_rec_h */
|