Line data Source code
1 : #ifndef HEADER_fd_src_funk_fd_funk_txn_h
2 : #define HEADER_fd_src_funk_fd_funk_txn_h
3 :
4 : /* This provides APIs for managing forks (preparing, publishing and
5 : cancelling funk transactions). It is generally not meant to be
6 : included directly. Use fd_funk.h instead. */
7 :
8 : #include "fd_funk_base.h"
9 :
10 : /* FD_FUNK_TXN_{ALIGN,FOOTPRINT} describe the alignment and footprint of
11 : a fd_funk_txn_t. ALIGN will be a power of 2, footprint will be a
12 : multiple of align. These are provided to facilitate compile time
13 : declarations. */
14 :
15 : #define FD_FUNK_TXN_ALIGN (32UL)
16 : #define FD_FUNK_TXN_FOOTPRINT (96UL)
17 :
18 : /* FD_FUNK_TXN_IDX_NULL gives the map transaction idx value used to
19 : represent NULL. It also is the maximum value for txn_max in a funk
20 : to support index compression. (E.g. could use ushort / USHORT_MAX
21 : to use more aggressive compression or ulong / ULONG_MAX to disable
22 : index compression.) */
23 :
24 3663617724 : #define FD_FUNK_TXN_IDX_NULL ((ulong)UINT_MAX)
25 :
26 : /* A fd_funk_txn_t is an opaque handle of an in-preparation funk
27 : transaction. The details are exposed here to facilitate inlining
28 : various operations. */
29 :
30 : struct __attribute__((aligned(FD_FUNK_TXN_ALIGN))) fd_funk_txn_private {
31 :
32 : /* These fields are managed by the funk's txn_map */
33 :
34 : fd_funk_txn_xid_t xid; /* Transaction id, at a minimum, unique among all in-prepare and the last published transaction,
35 : ideally globally unique */
36 : ulong map_next; /* Internal use by map */
37 : ulong map_hash; /* Internal use by map */
38 :
39 : /* These fields are managed by the funk */
40 :
41 : uint parent_cidx; /* compr map index of the in-prep parent txn, compr FD_FUNK_TXN_IDX_NULL if a funk child */
42 : uint child_head_cidx; /* " oldest child " childless */
43 : uint child_tail_cidx; /* " youngest child childless */
44 : uint sibling_prev_cidx; /* " older sibling oldest sibling */
45 : uint sibling_next_cidx; /* " younger sibling youngest sibling */
46 : uint stack_cidx; /* Internal use by funk */
47 : ulong tag; /* Internal use by funk */
48 :
49 : ulong rec_head_idx; /* Record map index of the first record, FD_FUNK_REC_IDX_NULL if none (from oldest to youngest) */
50 : ulong rec_tail_idx; /* " last " */
51 : };
52 :
53 : typedef struct fd_funk_txn_private fd_funk_txn_t;
54 :
55 : /* fd_funk_txn_map allows for indexing transactions by their xid */
56 :
57 : #define MAP_NAME fd_funk_txn_map
58 : #define MAP_T fd_funk_txn_t
59 : #define MAP_KEY_T fd_funk_txn_xid_t
60 : #define MAP_KEY xid
61 601618626 : #define MAP_KEY_EQ(k0,k1) fd_funk_txn_xid_eq((k0),(k1))
62 96730983 : #define MAP_KEY_HASH(k0,seed) fd_funk_txn_xid_hash((k0),(seed))
63 3758337 : #define MAP_KEY_COPY(kd,ks) fd_funk_txn_xid_copy((kd),(ks))
64 1510351872 : #define MAP_NEXT map_next
65 : #define MAP_HASH map_hash
66 : #define MAP_MAGIC (0xf173da2ce7172db0UL) /* Firedancer trn db version 0 */
67 : #define MAP_IMPL_STYLE 1
68 : #define MAP_MEMOIZE 1
69 : #include "../util/tmpl/fd_map_giant.c"
70 :
71 : FD_PROTOTYPES_BEGIN
72 :
73 : /* fd_funk_txn_{cidx,idx} convert between an index and a compressed index. */
74 :
75 253256484 : static inline uint fd_funk_txn_cidx( ulong idx ) { return (uint) idx; }
76 5293642887 : static inline ulong fd_funk_txn_idx ( uint idx ) { return (ulong)idx; }
77 :
78 : /* fd_funk_txn_idx_is_null returns 1 if idx is FD_FUNK_TXN_IDX_NULL and
79 : 0 otherwise. */
80 :
81 2929363572 : static inline int fd_funk_txn_idx_is_null( ulong idx ) { return idx==FD_FUNK_TXN_IDX_NULL; }
82 :
83 : /* Generate a globally unique pseudo-random xid */
84 : fd_funk_txn_xid_t fd_funk_generate_xid(void);
85 :
86 : /* Accessors */
87 :
88 : /* fd_funk_txn_cnt returns the number of transactions currently in
89 : preparation. Assumes funk is a current local join,
90 : map==fd_funk_txn_map( funk, fd_funk_wksp( funk ) ). See fd_funk.h
91 : for fd_funk_txn_max. */
92 :
93 25420839 : FD_FN_PURE static inline ulong fd_funk_txn_cnt( fd_funk_txn_t const * map ) { return fd_funk_txn_map_key_cnt( map ); }
94 :
95 : /* fd_funk_txn_is_full returns 1 if the transaction map is full (i.e.
96 : the maximum of transactions that can be in preparation has been
97 : reached) and 0 otherwise. Assumes funk is a current local join,
98 : map==fd_funk_txn_map( funk, fd_funk_wksp( funk ) ). */
99 :
100 4163535 : FD_FN_PURE static inline int fd_funk_txn_is_full( fd_funk_txn_t const * map ) { return fd_funk_txn_map_is_full( map ); }
101 :
102 : /* fd_funk_txn_query returns a pointer to an in-preparation transaction
103 : whose id is pointed to by xid. Returns NULL if xid is not an
104 : in-preparation transaction. Assumes funk is a current local join,
105 : map==fd_funk_txn_map( funk, fd_funk_wksp( funk ) ), xid points to a
106 : transaction id in the caller's address space and there are no
107 : concurrent operations on funk or xid. Retains no interest in xid.
108 :
109 : The returned pointer is in the caller's address space and, if the
110 : return is non-NULL, the lifetime of the returned pointer is the
111 : lesser of the funk join or the transaction is published or canceled
112 : (either directly or indirectly via publish of a descendant, publish
113 : of a competing transaction history or cancel of an ancestor).
114 :
115 : Callers wanting more control over queries (e.g. concurrent queries,
116 : sentinel transactions on failure, queries that don't optimize for
117 : future queries by the same xid, etc) should use fd_funk_txn_map_query
118 : or fd_funk_txn_map_query_const as appropriate. */
119 :
120 : FD_FN_PURE static inline fd_funk_txn_t *
121 : fd_funk_txn_query( fd_funk_txn_xid_t const * xid,
122 482434872 : fd_funk_txn_t * map ) {
123 482434872 : return fd_funk_txn_map_query( map, xid, NULL );
124 482434872 : }
125 :
126 : /* fd_funk_txn_xid returns a pointer in the local address space of the
127 : ID of an in-preparation transaction. Assumes txn points to an
128 : in-preparation transaction in the caller's address space. The
129 : lifetime of the returned pointer is the same as the txn's pointer
130 : lifetime. The value at the pointer will be stable for the lifetime
131 : of the returned pointer. */
132 :
133 659313510 : FD_FN_CONST static inline fd_funk_txn_xid_t const * fd_funk_txn_xid( fd_funk_txn_t const * txn ) { return &txn->xid; }
134 :
135 : /* fd_funk_txn_{parent,child_head,child_tail,sibling_prev,sibling_next}
136 : return a pointer in the caller's address space to the corresponding
137 : relative in-preparation transaction of in-preparation transaction
138 : txn. Assumes map == fd_funk_txn_map( funk, fd_funk_wksp( funk ) ),
139 : funk is a current local join and txn points to an in-preparation funk
140 : transaction in the caller's address space. The returned pointer
141 : lifetime and address space is as described in fd_funk_txn_query.
142 : These are not fortified against map data corruption.
143 :
144 : Specifically:
145 :
146 : - parent is the parent transaction. NULL if txn is a child of funk.
147 : - child_head is txn's oldest child. NULL if txn has no children.
148 : - child_tail is txn's youngest child. NULL if txn has no children.
149 : - sibling_prev is txn's closest older sibling. NULL if txn is the
150 : oldest sibling.
151 : - sibling_next is txn's closest younger sibling. NULL if txn is the
152 : youngest sibling.
153 :
154 : E.g. child_head==NULL indicates txn has no children.
155 : child_head!=NULL and child_head==child_tail indicates txn has one
156 : child. child_head!=NULL and child_tail!=NULL and
157 : child_head!=child_tail indicates txn has many children.
158 : sibling_prev==sibling_next==NULL indicate no siblings / locally
159 : competing transactions to txn. If the txn and all its ancestors have
160 : no siblings, there are no transaction histories competing with txn
161 : globally. */
162 :
163 : #define FD_FUNK_ACCESSOR(field) \
164 : FD_FN_PURE static inline fd_funk_txn_t * \
165 : fd_funk_txn_##field( fd_funk_txn_t const * txn, \
166 365777979 : fd_funk_txn_t * map ) { \
167 365777979 : ulong idx = fd_funk_txn_idx( txn->field##_cidx ); \
168 365777979 : if( idx==FD_FUNK_TXN_IDX_NULL ) return NULL; \
169 365777979 : return map + idx; \
170 365777979 : }
171 :
172 : FD_FUNK_ACCESSOR( parent )
173 : FD_FUNK_ACCESSOR( child_head )
174 : FD_FUNK_ACCESSOR( child_tail )
175 : FD_FUNK_ACCESSOR( sibling_prev )
176 : FD_FUNK_ACCESSOR( sibling_next )
177 :
178 : #undef FD_FUNK_ACCESSOR
179 :
180 : /* fd_funk_txn_frozen returns 1 if the in-preparation transaction is
181 : frozen (i.e. has children) and 0 otherwise (i.e. has no children).
182 : Assumes txn points to an in-preparation transaction in the caller's
183 : address space. */
184 :
185 : FD_FN_PURE static inline int
186 186944661 : fd_funk_txn_is_frozen( fd_funk_txn_t const * txn ) {
187 186944661 : return !fd_funk_txn_idx_is_null( fd_funk_txn_idx( txn->child_head_cidx ) );
188 186944661 : }
189 :
190 : /* fd_funk_txn_is_only_child returns 1 if the in-preparation transaction
191 : txn if is any only child and 0 if it has one or more siblings.
192 : Assumes txn points to an in-preparation transaction in the caller's
193 : address space. */
194 :
195 : FD_FN_PURE static inline int
196 98314716 : fd_funk_txn_is_only_child( fd_funk_txn_t const * txn ) {
197 98314716 : return ( fd_funk_txn_idx_is_null( fd_funk_txn_idx( txn->sibling_prev_cidx ) ) ) &
198 98314716 : ( fd_funk_txn_idx_is_null( fd_funk_txn_idx( txn->sibling_next_cidx ) ) );
199 98314716 : }
200 :
201 : typedef struct fd_funk_rec fd_funk_rec_t;
202 :
203 : /* Return the first record in a transaction. Returns NULL if the
204 : transaction has no records yet. */
205 :
206 : FD_FN_PURE fd_funk_rec_t const *
207 : fd_funk_txn_first_rec( fd_funk_t * funk,
208 : fd_funk_txn_t const * txn );
209 :
210 : /* Return the next record in a transaction. Returns NULL if the
211 : transaction has no more records. */
212 :
213 : FD_FN_PURE fd_funk_rec_t const *
214 : fd_funk_txn_next_rec( fd_funk_t * funk,
215 : fd_funk_rec_t const * rec );
216 :
217 : /* Operations */
218 :
219 : /* fd_funk_txn_ancestor returns a pointer in the caller's address space
220 : to the youngest transaction among in-preparation transaction txn and
221 : its ancestors that currently has siblings. Returns NULL if all
222 : transactions back to the root transaction have no siblings (e.g.
223 : there are no competing transaction histories and thus publishing
224 : transaction txn will not require canceling any other competing
225 : transactions). This is a reasonably fast O(length of ancestor
226 : history) time (theoretical minimum) and a reasonably small O(1) space
227 : (theoretical minimum). This is not fortified against transaction map
228 : data corruption.
229 :
230 : fd_funk_txn_descendant returns a pointer in the caller's address
231 : space to the first transaction among txn and its youngest direct
232 : descendant inclusive that currently either has no children or has
233 : multiple children. Returns NULL if txn is not an only child. This
234 : is a reasonably fast O(length of descendant history) time
235 : (theoretical minimum) and a reasonably small O(1) space (theoretical
236 : minimum). This is not fortified against transaction map data
237 : corruption.
238 :
239 : That is, if txn's ancestor is NULL, all transactions up to and
240 : including txn's descendant (which will be non-NULL) can be published
241 : without cancelling any competing transactions. Further, if the
242 : descendant has a child, it has multiple children. And if has no
243 : children, all transactions in preparation are linear from the root to
244 : the descendant.
245 :
246 : In code:
247 :
248 : if( !fd_funk_txn_ancestor( txn, map ) )
249 : fd_funk_publish( funk, fd_funk_txn_descendant( funk, map ) );
250 :
251 : would publish as much currently uncontested transaction history as
252 : possible around txn.
253 :
254 : Assumes map == fd_funk_txn_map( funk, fd_funk_wksp( funk ) ), funk is
255 : a current local join and txn points to an in-preparation transaction
256 : of funk in the caller's address space. The lifetime of the returned
257 : pointer is as described in fd_funk_txn_query. */
258 :
259 : FD_FN_PURE static inline fd_funk_txn_t *
260 : fd_funk_txn_ancestor( fd_funk_txn_t * txn,
261 27399642 : fd_funk_txn_t * map ) {
262 36634314 : for(;;) {
263 36634314 : if( !fd_funk_txn_is_only_child( txn ) ) break;
264 10288377 : fd_funk_txn_t * parent = fd_funk_txn_parent( txn, map );
265 10288377 : if( !parent ) return NULL;
266 9234672 : txn = parent;
267 9234672 : }
268 26345937 : return txn;
269 27399642 : }
270 :
271 : FD_FN_PURE static inline fd_funk_txn_t *
272 : fd_funk_txn_descendant( fd_funk_txn_t * txn,
273 30318915 : fd_funk_txn_t * map ) {
274 30318915 : if( !fd_funk_txn_is_only_child( txn ) ) return NULL; /* TODO: debatable, make contract? */
275 11342082 : for(;;) { /* txn is an only child at this point */
276 11342082 : fd_funk_txn_t * child = fd_funk_txn_child_head( txn, map );
277 11342082 : if( !child || !fd_funk_txn_is_only_child( child ) ) break;
278 2613423 : txn = child;
279 2613423 : }
280 8728659 : return txn;
281 30318915 : }
282 :
283 : /* IMPORTANT SAFETY TIP! **********************************************/
284 :
285 : /* fd_funk_txn_{prepare,publish,cancel,cancel_siblings,cancel_children}
286 : are the pointy end of the stick practically. As such, these are
287 : fortified against transaction map memory corruption. Since any such
288 : corruption would be prima facie evidence of a hardware fault,
289 : security compromise or software bug, these will FD_LOG_CRIT if
290 : corruption is detected to contain the blast radius and get as much
291 : ultra detailed diagnostic information to user (stack backtrace and
292 : core) as possible. (This behavior is straightforward to disable in
293 : fd_funk_txn.c.) */
294 :
295 : /**********************************************************************/
296 :
297 : /* fd_funk_txn_prepare starts preparation of a transaction. The
298 : transaction will be a child of the in-preparation transaction pointed
299 : to by parent. A NULL parent means the transaction should be a child
300 : of funk. xid points to transaction id that should be used for the
301 : transaction. This id must be unique over all in-preparation
302 : transactions, the root transaction and the last published
303 : transaction. It is strongly recommended to use globally unique ids
304 : when possible. Returns a pointer in the caller's address space to
305 : the in-preparation transaction on success and NULL on failure. The
306 : lifetime of the returned pointer is as described in
307 : fd_funk_txn_query.
308 :
309 : At start of preparation, the records in the txn are a clone of the
310 : records in its parent transaction. The funk records can be modified
311 : when the funk has no children. Similarly, the records of an
312 : in-preparation transaction can be freely modified when the funk has
313 : no children.
314 :
315 : Assumes funk is a current local join. Reasons for failure include
316 : funk is NULL, the funk's transaction map is full, the parent is
317 : neither NULL nor points to an in-preparation funk transaction, xid is
318 : NULL, the requested xid is in use (i.e. the last published or matches
319 : another in-preparation transaction). If verbose is non-zero, these
320 : will FD_LOG_WARNING details about the reason for failure.
321 :
322 : This is a reasonably fast O(1) time (theoretical minimum), reasonably
323 : small O(1) space (theoretical minimum), does no allocation, does no
324 : system calls, and produces no garbage to collect (at this layer at
325 : least). That is, we can scalably track forks until we run out of
326 : resources allocated to the funk. */
327 :
328 : fd_funk_txn_t *
329 : fd_funk_txn_prepare( fd_funk_t * funk,
330 : fd_funk_txn_t * parent,
331 : fd_funk_txn_xid_t const * xid,
332 : int verbose );
333 :
334 : /* fd_funk_txn_cancel cancels in-preparation transaction txn and any of
335 : its in-preparation descendants. On success, returns the number of
336 : transactions cancelled and 0 on failure. The state of records in the
337 : cancelled transactions will be lost and all resources used under the
338 : hood are available for reuse. If this makes the txn's parent
339 : childless, this will unfreeze the parent.
340 :
341 : fd_funk_txn_cancel_siblings cancels txn's siblings and their
342 : descendants.
343 :
344 : fd_funk_txn_cancel_children cancels txn's children and their
345 : descendants. If txn is NULL, all children of funk will be cancelled
346 : (such that the number of transactions in preparation afterward will
347 : be zero).
348 :
349 : Cancellations proceed from youngest to oldest in a tree depth first
350 : sense.
351 :
352 : Assumes funk is a current local join. Reasons for failure include
353 : NULL funk or txn does not point to an in-preparation funk
354 : transaction. If verbose is non-zero, these will FD_LOG_WARNING level
355 : details about the reason for failure.
356 :
357 : These are a reasonably fast O(number of cancelled transactions) time
358 : (the theoretical minimum), reasonably small O(1) space (the
359 : theoretical minimum), does no allocation, does no system calls, and
360 : produces no garbage to collect (at this layer at least). That is, we
361 : can scalably track forks until we run out of resources allocated to
362 : the funk. */
363 :
364 : ulong
365 : fd_funk_txn_cancel( fd_funk_t * funk,
366 : fd_funk_txn_t * txn,
367 : int verbose );
368 :
369 : ulong
370 : fd_funk_txn_cancel_siblings( fd_funk_t * funk,
371 : fd_funk_txn_t * txn,
372 : int verbose );
373 :
374 : ulong
375 : fd_funk_txn_cancel_children( fd_funk_t * funk,
376 : fd_funk_txn_t * txn,
377 : int verbose );
378 :
379 : ulong
380 : fd_funk_txn_cancel_all( fd_funk_t * funk,
381 : int verbose );
382 :
383 : /* fd_funk_txn_publish publishes in-preparation transaction txn and any
384 : of txn's in-preparation ancestors. Returns the number of
385 : transactions published. Any competing histories to this chain will
386 : be cancelled.
387 :
388 : This follows a principle of least information loss. Specifically,
389 : publications proceed incrementally from the oldest ancestor to txn
390 : inclusive. When a transaction is published, the transaction is first
391 : committed to permanent storage. If this is unsuccessful, the publish
392 : is stopped at this transaction and the transaction remains
393 : unpublished. Otherwise, the transaction's siblings and their
394 : descendants are cancelled.
395 :
396 : As such, it is possible in a funk implementation (e.g. permanent
397 : storage I/O errors) for fd_funk_txn_publish to only publish some of
398 : the ancestors. Partial publication will only happen on error. On
399 : such a failure, no information is lost about the transaction that
400 : failed to publish, its siblings or its descendants. Likewise, all
401 : the failed transaction's ancestors were reliably published. Funk
402 : last publish, query, the various traversals and so forth can be used
403 : to diagnose the details about such situations.
404 :
405 : Assumes funk is a current local join. Reasons for failure include
406 : NULL funk, txn does not point to an in-preparation funk transaction.
407 : If verbose is non-zero, these will FD_LOG_WARNING the details about
408 : the reason for failure.
409 :
410 : This is a reasonably fast O(number of published transactions) +
411 : O(number of cancelled transactions) time (theoretical minimum),
412 : reasonably small O(1) space (theoretical minimum), does no allocation
413 : does no system calls, and produces no garbage to collect (at this
414 : layer at least). That is, we can scalably track forks until we run
415 : out of resources allocated to the funk. */
416 :
417 : ulong
418 : fd_funk_txn_publish( fd_funk_t * funk,
419 : fd_funk_txn_t * txn,
420 : int verbose );
421 :
422 : /* This version of publish just combines the transaction with its
423 : immediate parent. Ancestors will remain unpublished. Any competing
424 : histories (siblings of the given transaction) are still cancelled.
425 :
426 : Returns FD_FUNK_SUCCESS on success or an error code on failure. */
427 : int
428 : fd_funk_txn_publish_into_parent( fd_funk_t * funk,
429 : fd_funk_txn_t * txn,
430 : int verbose );
431 :
432 : /* fd_funk_txn_merge merges all the children into their parent
433 : transaction. The intention is to support gathering small,
434 : short-term transactions into a large transaction. Strictly
435 : speaking, this API isn't required, but a list of small children can
436 : be more efficiently and conveniently managed as a single large
437 : transaction if the intention is to publish all of them at
438 : once. Recall that child transactions can be cancelled due to an
439 : error without affecting the parent transaction, which allows
440 : robust, incremental assembly of a very big transaction.
441 :
442 : The children must not have grandchildren.
443 :
444 : Returns FD_FUNK_SUCCESS on success or an error code on failure.
445 :
446 : Assumes funk is a current local join. Reasons for failure include
447 : NULL funk or txn does not point to an in-preparation funk
448 : transaction. If verbose is non-zero, these will FD_LOG_WARNING level
449 : details about the reason for failure.
450 :
451 : Unfortunately, the semantics of how to deal with the same record
452 : key appearing in multiple children is not defined. So, for now,
453 : this API is commented out. */
454 :
455 : /*
456 : int
457 : fd_funk_txn_merge_all_children( fd_funk_t * funk,
458 : fd_funk_txn_t * parent_txn,
459 : int verbose );
460 : */
461 :
462 : /* Misc */
463 :
464 : /* fd_funk_txn_verify verifies a transaction map. Returns
465 : FD_FUNK_SUCCESS if the transaction map appears intact and
466 : FD_FUNK_ERR_INVAL if not (logs details). Meant to be called as part
467 : of fd_funk_verify. As such, it assumes funk is non-NULL and
468 : fd_funk_{wksp,txn_map} have already been verified to work. */
469 :
470 : int
471 : fd_funk_txn_verify( fd_funk_t * funk );
472 :
473 : FD_PROTOTYPES_END
474 :
475 : #endif /* HEADER_fd_src_funk_fd_funk_txn_h */
|