LCOV - code coverage report
Current view: top level - flamenco/accdb - fd_accdb.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 21 50 42.0 %
Date: 2026-06-30 05:50:37 Functions: 15 660 2.3 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_flamenco_accdb_fd_accdb_h
       2             : #define HEADER_fd_src_flamenco_accdb_fd_accdb_h
       3             : 
       4             : #include "fd_accdb_shmem.h"
       5             : #include "../../util/bits/fd_bits.h"
       6             : 
       7             : /* The accdb is a fork aware database that can be queried to get the
       8             :    current state of any accounts as-of a given fork, and update them. */
       9             : 
      10       11523 : #define FD_ACCDB_ALIGN (128UL)
      11             : 
      12             : /* Well-known file descriptor numbers for the accounts database backing
      13             :    file.  Tiles inherit these from the parent process which dups the
      14             :    accounts file to these fds before fork+exec, so seccomp filters can
      15             :    pin syscalls to a fixed fd.  fd 123462 is reserved by XDP. */
      16             : 
      17           0 : #define FD_ACCDB_FD_RW (123461)
      18           0 : #define FD_ACCDB_FD_RO (123460)
      19             : 
      20             : struct fd_accdb_private;
      21             : typedef struct fd_accdb_private fd_accdb_t;
      22             : 
      23             : struct fd_accdb_fork_id { ushort val; };
      24             : typedef struct fd_accdb_fork_id fd_accdb_fork_id_t;
      25             : 
      26             : struct fd_accdb_entry {
      27             :   uchar   pubkey[ 32UL ];
      28             :   uchar   owner[ 32UL ];
      29             :   ulong   lamports;
      30             :   int     executable;
      31             : 
      32             :   ulong   data_len;
      33             :   uchar * data;
      34             : 
      35             :   uchar   prior_owner[ 32UL ];
      36             :   ulong   prior_lamports;
      37             :   int     prior_executable;
      38             :   ulong   prior_data_len;
      39             :   uchar * prior_data;
      40             : 
      41             :   int     commit;
      42             : 
      43             :   int     _writable;
      44             :   int     _overwrite;
      45             : 
      46             :   ushort  _fork_id;
      47             :   uint    _generation;
      48             :   ulong   _acc_map_idx;
      49             : 
      50             :   ulong   _original_size_class;
      51             :   ulong   _original_cache_idx;
      52             : 
      53             :   struct {
      54             :     ulong destination_cache_idx[ 8UL ];
      55             :   } _write;
      56             : };
      57             : 
      58             : typedef struct fd_accdb_entry fd_acc_t;
      59             : 
      60             : FD_PROTOTYPES_BEGIN
      61             : 
      62             : #if FD_HAS_INT128
      63             : 
      64             : static inline ulong
      65   483951606 : fd_xxh3_mul128_fold64( ulong lhs, ulong rhs ) {
      66   483951606 :   uint128 product = (uint128)lhs * (uint128)rhs;
      67   483951606 :   return (ulong)product ^ (ulong)( product>>64 );
      68   483951606 : }
      69             : 
      70             : static inline ulong
      71             : fd_xxh3_mix16b( ulong i0, ulong i1,
      72             :                 ulong s0, ulong s1,
      73   483951606 :                 ulong seed ) {
      74   483951606 :   return fd_xxh3_mul128_fold64( i0 ^ (s0 + seed), i1 ^ (s1 - seed) );
      75   483951606 : }
      76             : 
      77             : FD_FN_PURE static inline ulong
      78             : fd_accdb_hash( uchar const key[ 32 ],
      79   241975803 :                ulong       seed ) {
      80   241975803 :   ulong k0 = FD_LOAD( ulong, key+ 0 );
      81   241975803 :   ulong k1 = FD_LOAD( ulong, key+ 8 );
      82   241975803 :   ulong k2 = FD_LOAD( ulong, key+16 );
      83   241975803 :   ulong k3 = FD_LOAD( ulong, key+24 );
      84   241975803 :   ulong acc = 32 * 0x9E3779B185EBCA87ULL;
      85   241975803 :   acc += fd_xxh3_mix16b( k0, k1, 0xbe4ba423396cfeb8UL, 0x1cad21f72c81017cUL, seed );
      86   241975803 :   acc += fd_xxh3_mix16b( k2, k3, 0xdb979083e96dd4deUL, 0x1f67b3b7a4a44072UL, seed );
      87   241975803 :   acc = acc ^ (acc >> 37);
      88   241975803 :   acc *= 0x165667919E3779F9ULL;
      89   241975803 :   acc = acc ^ (acc >> 32);
      90   241975803 :   return acc;
      91   241975803 : }
      92             : 
      93             : #else
      94             : 
      95             : /* If the target does not support xxHash3, fallback to the 'old' key
      96             :    hash function.
      97             : 
      98             :    FIXME This version is vulnerable to HashDoS */
      99             : 
     100             : FD_FN_PURE static inline ulong
     101             : fd_accdb_hash( uchar const key[ 32 ],
     102             :                ulong       seed ) {
     103             :   /* tons of ILP */
     104             :   return (fd_ulong_hash( seed ^ (1UL<<0) ^ FD_LOAD( ulong, key+ 0 ) )   ^
     105             :           fd_ulong_hash( seed ^ (1UL<<1) ^ FD_LOAD( ulong, key+ 8 ) ) ) ^
     106             :          (fd_ulong_hash( seed ^ (1UL<<2) ^ FD_LOAD( ulong, key+16 ) ) ^
     107             :           fd_ulong_hash( seed ^ (1UL<<3) ^ FD_LOAD( ulong, key+24 ) ) );
     108             : }
     109             : 
     110             : #endif /* FD_HAS_INT128 */
     111             : 
     112             : FD_FN_CONST ulong
     113             : fd_accdb_align( void );
     114             : 
     115             : FD_FN_CONST ulong
     116             : fd_accdb_footprint( ulong max_live_slots );
     117             : 
     118             : /* fd_accdb_new constructs the local joiner state for an accdb writer
     119             :    (or compaction tile).  fd is an O_RDWR fd of the on-disk file.
     120             : 
     121             :    external_epoch_cnt and external_epoch_slots provide a list of
     122             :    additional epoch publish slots to scan during compaction's
     123             :    deferred-free reclamation.  These point at memory owned by other
     124             :    processes (typically the per-tile fseq of read-only consumers like
     125             :    the rpc tile), mapped read-only into this joiner's address space.
     126             :    Each *external_epoch_slots[i] is updated by the owning RO joiner
     127             :    on each epoch-protected operation (and reset to ULONG_MAX when
     128             :    idle), and is used by this joiner's compaction scan to determine
     129             :    when on-disk partitions can be safely reclaimed.
     130             : 
     131             :    For joiners that do not need to track external RO consumers (i.e.
     132             :    any joiner that is not the compaction tile, or a writer-only
     133             :    topology), pass external_epoch_cnt=0 and external_epoch_slots=NULL.
     134             :    The pointer array is borrowed and must remain valid for the
     135             :    lifetime of the join. */
     136             : 
     137             : void *
     138             : fd_accdb_new( void *              ljoin,
     139             :               fd_accdb_shmem_t *  shmem,
     140             :               int                 fd,
     141             :               ulong               external_epoch_cnt,
     142             :               ulong const **      external_epoch_slots );
     143             : 
     144             : fd_accdb_t *
     145             : fd_accdb_join( void * shaccdb );
     146             : 
     147             : /* fd_accdb_join_readonly is the read-only counterpart of fd_accdb_new +
     148             :    fd_accdb_join.  shmem_ro may point into a read-only mapping of the
     149             :    shmem region; the function will not write to it.  my_epoch_slot_rw
     150             :    must point at a ulong owned by this joiner that it can write to
     151             :    (typically a private per-tile fseq that the accdb tile maps read-only
     152             :    and passes through external_epoch_slots[] in fd_accdb_new).  fd_ro
     153             :    must be opened O_RDONLY on the same file the writer joiner opened RW.
     154             : 
     155             :    The joiner publishes its current epoch into *my_epoch_slot_rw on
     156             :    entry to each epoch-protected operation (and resets to ULONG_MAX on
     157             :    exit).  The accdb tile's compaction scan observes this slot via its
     158             :    external_epoch_slots[] pointer and defers partition reclamation
     159             :    accordingly, the same way it does for in-shmem joiner_epochs[].
     160             : 
     161             :    Only fd_accdb_read_one_nocache, fd_accdb_exists, and
     162             :    fd_accdb_lamports are supported on a readonly join; any other API is
     163             :    undefined behavior. */
     164             : 
     165             : fd_accdb_t *
     166             : fd_accdb_join_readonly( void *             ljoin,
     167             :                         fd_accdb_shmem_t * shmem_ro,
     168             :                         ulong *            my_epoch_slot_rw,
     169             :                         int                fd_ro );
     170             : 
     171             : /* fd_accdb_snapshot_load_{begin,end} toggle a mode on this writer
     172             :    joiner that causes layer-0 partition handoffs to backfill tiering
     173             :    for older snapshot-loaded partitions.  Specifically, when a new
     174             :    partition P is opened at layer 0, the partition at P-2 is retiered
     175             :    to Warm (layer 1) and the partition at P-3 is retiered to Cold
     176             :    (layer 2).  This compensates for the fact that snapshot-loaded
     177             :    accounts never get a second write and therefore never get promoted
     178             :    by normal compaction-driven tiering.
     179             : 
     180             :    Only the snapin tile is expected to use this.  The flag is
     181             :    per-joiner and is not visible across processes. */
     182             : 
     183             : void
     184             : fd_accdb_snapshot_load_begin( fd_accdb_t * accdb );
     185             : 
     186             : void
     187             : fd_accdb_snapshot_load_end( fd_accdb_t * accdb );
     188             : 
     189             : /* fd_accdb_snapshot_recovery_t captures layer-0 write head metadata.
     190             :    Used by fd_accdb_snapshot_{save,revert}_whead to save and restore
     191             :    accdb state across an incremental snapshot attempt. */
     192             : 
     193             : struct fd_accdb_snapshot_recovery {
     194             :   ulong whead_val;             /* whead[0].val */
     195             :   int   has_partition;         /* has_partition[0] */
     196             :   ulong partition_max;         /* partition_max */
     197             :   ulong disk_current_bytes;    /* disk_current_bytes metric */
     198             :   ulong savepoint_bytes_freed; /* bytes_freed of the save-point partition */
     199             : };
     200             : 
     201             : typedef struct fd_accdb_snapshot_recovery fd_accdb_snapshot_recovery_t;
     202             : 
     203             : /* fd_accdb_snapshot_save_whead captures the current layer-0 write head,
     204             :    partition state, and disk_current_bytes metric into the provided
     205             :    recovery struct.  Also captures the save-point partition's
     206             :    bytes_freed. */
     207             : 
     208             : void
     209             : fd_accdb_snapshot_save_whead( fd_accdb_t *                   accdb,
     210             :                               fd_accdb_snapshot_recovery_t * out );
     211             : 
     212             : /* fd_accdb_snapshot_revert_whead restores the layer-0 write head to a
     213             :    previously saved position.
     214             : 
     215             :    It internally waits for the pending background purge command to
     216             :    complete on T2 before releasing partitions, so the caller does not
     217             :    need to insert a separate wait_cmd barrier.
     218             : 
     219             :    Previously allocated partitions (with indices in the range
     220             :    [saved_partition_max, current partition_max)) are released back
     221             :    to the partition pool.  disk_current_bytes is restored to the saved
     222             :    value rather than computed per-partition, and the save-point
     223             :    partition's bytes_freed and write_offset are reset. */
     224             : 
     225             : void
     226             : fd_accdb_snapshot_revert_whead( fd_accdb_t *                         accdb,
     227             :                                 fd_accdb_snapshot_recovery_t const * recover );
     228             : 
     229             : /* fd_accdb_attach_child allocates a new fork as a child of
     230             :    parent_fork_id and returns the new fork's id.  This must be done
     231             :    any time a new fork is being inserted into the accounts database,
     232             :    so that the accounts database can maintain ancestry information
     233             :    in order to support queries correctly.
     234             : 
     235             :    To create the initial root fork, pass a sentinel value with
     236             :    val==USHORT_MAX as parent_fork_id.  This must be done exactly
     237             :    once, before any other fork operations.
     238             : 
     239             :    For non-root forks, parent_fork_id must refer to a fork that has
     240             :    already been attached.  The ancestry must form a tree and it is
     241             :    undefined behavior to create cycles. */
     242             : 
     243             : fd_accdb_fork_id_t
     244             : fd_accdb_attach_child( fd_accdb_t *       accdb,
     245             :                        fd_accdb_fork_id_t parent_fork_id );
     246             : 
     247             : /* fd_accdb_advance_root advances the root of the accounts database to
     248             :    the given fork_id.  fork_id must be a direct child of the current
     249             :    root (i.e. fork->parent_id equals the current root_fork_id).
     250             : 
     251             :    Any competing sibling forks (and their entire subtrees) are removed.
     252             :    For accounts updated on the newly rooted fork, any older versions on
     253             :    ancestor forks are tombstoned for later compaction.  After this call
     254             :    the old root fork slot is freed and fork_id becomes the new root.
     255             : 
     256             :    IMPORTANT: The caller must guarantee that all outstanding
     257             :    acquire/release pairs on every sibling of fork_id (and their entire
     258             :    subtrees) have completed before calling advance_root.  advance_root
     259             :    implicitly purges those sibling subtrees, which frees their fork pool
     260             :    slots for recycling.
     261             : 
     262             :    Once a fork is rooted, its generation becomes the new
     263             :    root_generation.  Concurrent acquires that observe the new root will
     264             :    use the generation fast path (generation <= root_generation) for all
     265             :    accounts from that fork and its ancestors, bypassing descends_set
     266             :    entirely.  This is what makes fork pool slot recycling safe: by the
     267             :    time a slot is freed and reusable, no reader will ever consult
     268             :    descends_set for the old fork_id. */
     269             : 
     270             : void
     271             : fd_accdb_advance_root( fd_accdb_t *       accdb,
     272             :                        fd_accdb_fork_id_t fork_id );
     273             : 
     274             : /* fd_accdb_purge removes the provided fork and all of its descendants
     275             :    from the accounts database.  This is an extremely rare operation,
     276             :    used to handle cases where a leader equivocated and produced two
     277             :    competing blocks for the same slot.
     278             : 
     279             :    All accounts written on the purged fork and any child or
     280             :    grandchild forks are removed from the index, and their disk
     281             :    space is freed for compaction.  The ancestry information for all
     282             :    purged forks is also removed.
     283             : 
     284             :    IMPORTANT: The caller must guarantee that all outstanding
     285             :    acquire/release pairs on the purged fork and every descendant
     286             :    have completed before calling purge.  The same fork pool slot
     287             :    recycling hazard described for advance_root applies here. */
     288             : 
     289             : void
     290             : fd_accdb_purge( fd_accdb_t *       accdb,
     291             :                 fd_accdb_fork_id_t fork_id );
     292             : 
     293             : /* fd_accdb_acquire brings all of the requested accounts as-of the given
     294             :    fork_idx into the cache, and refcnts them in the cache so they cannot
     295             :    be evicted until later released.
     296             : 
     297             :    fork_idx is the fork index from replay to query as-of, and must exist
     298             :    for the entire duration of the acquire call, meaning, whoever is
     299             :    acquiring must have a refcnt on the bank corresponding to fork_idx,
     300             :    and not release it until after the accounts are acquired.  It is safe
     301             :    to release the bank after the acquire call returns, and this will not
     302             :    cause the acquired accounts to be evicted from the cache.
     303             : 
     304             :    pubkeys_cnt is the number of accounts to acquire, and pubkeys is an
     305             :    array of pointers to the 32-byte pubkeys of the accounts to acquire.
     306             :    writable is an array of flags indicating whether each corresponding
     307             :    account in pubkeys is being acquired for read (0) or write (1).
     308             :    Writes provide a temporary buffer of 10MiB in all cases, which the
     309             :    caller can use for staging changes to the data, and this allows
     310             :    account resizing, or cancelling of any data written (for example if a
     311             :    transaction fails) without needing to restore it.  If an account is
     312             :    acquired for write, the caller must set the commit bit on the acc
     313             :    to non-zero to have the changes written back to the database on
     314             :    release, or leave it at zero to discard the changes.  The commit bit
     315             :    must be set even if only the metadata has changed.
     316             : 
     317             :    IMPORTANT: The caller must guarantee that for any given (pubkey,
     318             :    fork) pair, there is no concurrent acquire that holds a writable
     319             :    acc while another acquire for the same account on the same fork is
     320             :    outstanding (whether readable or writable).  Specifically:
     321             : 
     322             :      - Multiple concurrent read-only acquires of the same account on the
     323             :        same fork are permitted.
     324             :      - A writable acquire of an account on a given fork must not overlap
     325             :        with any other acquire (read or write) of that same account on
     326             :        that same fork.
     327             :      - Acquires of the same account on _different_ forks are always safe
     328             :        and may overlap freely, provided that all releases on an ancestor
     329             :        fork have completed before any acquire on a descendant fork
     330             :        begins.  In particular, a fork must finish all of its transaction
     331             :        execution (including committing or cancelling every writable
     332             :        account) before a child fork is attached and begins acquiring.
     333             :        This is naturally guaranteed by the replay scheduler, which does
     334             :        not activate a child block until the parent block is fully done.
     335             :        Concurrent acquires across unrelated sibling forks have no
     336             :        ordering requirement.
     337             : 
     338             :    Violating this contract is undefined behavior and will likely crash
     339             :    with an assertion failure inside the cache refcount logic.  In
     340             :    practice, these constraints are naturally satisfied by the Solana
     341             :    execution model: each transaction has exclusive write locks on its
     342             :    writable accounts within a slot, the scheduler ensures no two
     343             :    concurrent transactions write to the same account on the same fork,
     344             :    and the replay scheduler serializes parent block completion before
     345             :    child block activation on the same fork chain.
     346             : 
     347             :    When a writable account is committed as an "overwrite" (same
     348             :    fork), the acc pool element's metadata fields (size, lamports,
     349             :    offset) are mutated in place, and the cache line's owner field is
     350             :    updated.  This is safe because these mutations
     351             :    only happen on the acc element whose generation matches the
     352             :    committing fork.  A concurrent acquire on a different fork cannot
     353             :    observe an in-place mutation of the same acc element for a child fork
     354             :    to even exist, the parent must be frozen and no longer undergoing
     355             :    modifications.  All acc pool fields are effectively immutable from
     356             :    the perspective of any concurrent cross-fork reader.
     357             : 
     358             :    out_accs is an array of pubkeys_cnt cache accs to be filled in
     359             :    with the acquired accounts.  The cache will fill the owner, lamports,
     360             :    data_len, and data fields of each acc if the acquire is successful,
     361             :    and the account exists.  If the account does not exist, the lamports
     362             :    field will be set to zero and other fields are undefined. */
     363             : 
     364             : void
     365             : fd_accdb_acquire( fd_accdb_t *          accdb,
     366             :                   fd_accdb_fork_id_t    fork_id,
     367             :                   ulong                 pubkeys_cnt,
     368             :                   uchar const * const * pubkeys,
     369             :                   int *                 writable,
     370             :                   fd_acc_t *            out_accs );
     371             : 
     372             : void
     373             : fd_accdb_acquire_a( fd_accdb_t *          accdb,
     374             :                     fd_accdb_fork_id_t    fork_id,
     375             :                     ulong                 pubkeys_cnt,
     376             :                     uchar const * const * pubkeys,
     377             :                     int *                 writable,
     378             :                     fd_acc_t *            out_accs );
     379             : 
     380             : void
     381             : fd_accdb_acquire_b( fd_accdb_t *          accdb,
     382             :                     fd_accdb_fork_id_t    fork_id,
     383             :                     ulong                 reserved_cnt,
     384             :                     ulong                 pubkeys_cnt,
     385             :                     uchar const * const * pubkeys,
     386             :                     int *                 writable,
     387             :                     fd_acc_t *            out_accs );
     388             : 
     389             : /* fd_accdb_release releases previously acquired accounts back to the
     390             :    cache, and if any of the released writable accounts have their commit
     391             :    bit set, the cache will write the changes back to the database.  The
     392             :    caller must guarantee that the accs being released were previously
     393             :    acquired and not yet released, and that the pubkeys in the accs
     394             :    match the pubkeys of the acquired accounts.  The accs need not be
     395             :    a specific set that was acquired together, although this is
     396             :    recommended.  The fork that each acc refers to must still exist
     397             :    (not yet purged or advanced past) at the time of release.  This
     398             :    includes forks that would be implicitly purged by a concurrent
     399             :    advance_root on a sibling — the caller must ensure advance_root
     400             :    is not called until all releases on affected forks have completed.
     401             :    Releasing accounts for a fork that has been purged or recycled is
     402             :    undefined behavior. */
     403             : 
     404             : void
     405             : fd_accdb_release( fd_accdb_t * accdb,
     406             :                   ulong        accs_cnt,
     407             :                   fd_acc_t *   accs );
     408             : 
     409             : void
     410             : fd_accdb_release_ab( fd_accdb_t * accdb,
     411             :                      ulong        accs_cnt,
     412             :                      fd_acc_t *   accs,
     413             :                      ulong        execs_cnt,
     414             :                      fd_acc_t *   execs );
     415             : 
     416             : fd_acc_t
     417             : fd_accdb_read_one( fd_accdb_t *       accdb,
     418             :                    fd_accdb_fork_id_t fork_id,
     419             :                    uchar const *      pubkey );
     420             : 
     421             : fd_acc_t
     422             : fd_accdb_write_one( fd_accdb_t *       accdb,
     423             :                     fd_accdb_fork_id_t fork_id,
     424             :                     uchar const *      pubkey );
     425             : 
     426             : void
     427             : fd_accdb_unwrite_one( fd_accdb_t * accdb,
     428             :                       fd_acc_t *   acc );
     429             : 
     430             : void
     431             : fd_accdb_unread_one( fd_accdb_t * accdb,
     432             :                      fd_acc_t *   acc );
     433             : 
     434             : int
     435             : fd_accdb_exists( fd_accdb_t *       accdb,
     436             :                  fd_accdb_fork_id_t fork_id,
     437             :                  uchar const *      pubkey );
     438             : 
     439             : /* fd_accdb_read_one_nocache reads one account at fork_id into
     440             :    caller-provided output buffers.  Suitable for processes that mmap the
     441             :    accdb data region read-only: it never mutates any cache line, index
     442             :    entry, or record.  The only write it makes into accdb shmem is
     443             :    publishing this joiner's epoch (to hold off compaction for the
     444             :    duration of the read), and that is done through a separately-mmap'd
     445             :    writable page aliasing the joiner's own epoch slot, not the read-only
     446             :    region.
     447             : 
     448             :    out_owner must point at a 32-byte buffer.  out_data must point at a
     449             :    buffer of at least FD_RUNTIME_ACC_SZ_MAX (10 MiB) bytes, the maximum
     450             :    account data size; the function does not bound-check against the
     451             :    account's actual length.  On a cache hit the bytes are memcpy'd from
     452             :    the cache slot using a try-read-test (ABA) loop; on a miss the owner
     453             :    and data are preadv2'd from the disk fd passed at join time, scattered
     454             :    into out_owner and out_data via iovec (looping on short reads).
     455             : 
     456             :    If the account does not exist, *out_lamports is set to zero and the
     457             :    other outputs are undefined; otherwise *out_lamports is non-zero and
     458             :    out_executable, out_owner, out_data, and out_data_len are all filled
     459             :    in.
     460             : 
     461             :    The function takes no reference; nothing needs to be released. */
     462             : 
     463             : void
     464             : fd_accdb_read_one_nocache( fd_accdb_t *       accdb,
     465             :                            fd_accdb_fork_id_t fork_id,
     466             :                            uchar const *      pubkey,
     467             :                            ulong *            out_lamports,
     468             :                            int *              out_executable,
     469             :                            uchar *            out_owner,
     470             :                            uchar *            out_data,
     471             :                            ulong *            out_data_len );
     472             : 
     473             : /* fd_accdb_lamports returns the lamports of the account at fork_id, or
     474             :    zero if the account does not exist. */
     475             : 
     476             : ulong
     477             : fd_accdb_lamports( fd_accdb_t *       accdb,
     478             :                    fd_accdb_fork_id_t fork_id,
     479             :                    uchar const *      pubkey );
     480             : 
     481             : /* fd_accdb_reset reinitializes the accdb to the state immediately after
     482             :    fd_accdb_new.  All in-memory index state is cleared and all pool
     483             :    joins are re-established.  The caller is responsible for truncating
     484             :    the on-disk file separately (e.g. via the snapwr tile).
     485             : 
     486             :    The caller must guarantee that no other thread is concurrently
     487             :    accessing the accdb (no outstanding acquires, no background work). */
     488             : 
     489             : void
     490             : fd_accdb_reset( fd_accdb_t * accdb );
     491             : 
     492             : /* fd_accdb_snapshot_write_one inserts or replaces an account during
     493             :    snapshot loading.  Returns -1 if the write was ignored (an existing
     494             :    acc has a higher slot), 1 if a new acc was inserted, 2 if an
     495             :    existing acc was replaced.  When 2 is returned, *out_replaced_lamports
     496             :    is set to the lamports of the replaced acc.  Otherwise it is set to
     497             :    0.  out_replaced_lamports must be non-NULL.
     498             : 
     499             :    slot must be <= UINT_MAX.  The slot is held in a 32-bit scratch field
     500             :    during snapshot loading; the accdb format must be widened before
     501             :    Solana reaches slot 2^32.  Passing a larger slot crashes the
     502             :    process.
     503             : 
     504             :    fork_id controls recovery behavior:
     505             : 
     506             :      USHORT_MAX, full-snapshot mode.  Existing entries with the same
     507             :                  pubkey are replaced in-place.  No txn entries are
     508             :                  created.
     509             : 
     510             :      other,      incremental-snapshot mode.  Cross-snapshot overrides
     511             :                  (existing entry from a different fork) insert a NEW
     512             :                  acc_pool entry alongside the old one and create a txn
     513             :                  record on fork_id, so fd_accdb_purge can revert the
     514             :                  incremental writes on failure.  Intra-fork duplicates
     515             :                  (same pubkey from the same fork) are still replaced
     516             :                  in-place. */
     517             : 
     518             : int
     519             : fd_accdb_snapshot_write_one( fd_accdb_t *       accdb,
     520             :                              fd_accdb_fork_id_t fork_id,
     521             :                              uchar const *      pubkey,
     522             :                              ulong              slot,
     523             :                              ulong              lamports,
     524             :                              ulong              data_len,
     525             :                              int                executable,
     526             :                              ulong *            out_replaced_lamports );
     527             : 
     528             : /* fd_accdb_snapshot_write_batch processes up to 8 accounts at once,
     529             :    using software prefetching to overlap hash chain memory latency with
     530             :    useful work.  Each pubkey[i] points to a 32-byte public key.
     531             :    *out_replaced_lamports is set to the sum of the lamports of all
     532             :    accounts replaced by this batch (i.e. the previous lamports value of
     533             :    each account whose acc was overwritten).  *out_ignored_lamports is
     534             :    set to the sum of the lamports of all accounts ignored by this batch
     535             :    (i.e. the lamports of each input account whose write was dropped
     536             :    because an acc with a higher slot already exists).  Returns 0 on
     537             :    success, -1 if the batch contained two entries with the same pubkey
     538             :    (a corrupt-snapshot signal — the caller should flag the snapshot
     539             :    malformed).  Output counters are not meaningful when -1 is returned.
     540             : 
     541             :    Each slots[i] must be <= UINT_MAX (see fd_accdb_snapshot_write_one
     542             :    for the rationale).  Passing a larger slot crashes the process.
     543             : 
     544             :    fork_id has the same semantics as in fd_accdb_snapshot_write_one:
     545             :    USHORT_MAX for full-snapshot mode, otherwise incremental mode with
     546             :    txn tracking on the specified fork. */
     547             : 
     548             : int
     549             : fd_accdb_snapshot_write_batch( fd_accdb_t *        accdb,
     550             :                                fd_accdb_fork_id_t  fork_id,
     551             :                                ulong               cnt,
     552             :                                uchar const * const pubkeys[],
     553             :                                ulong  const        slots[],
     554             :                                ulong  const        lamports[],
     555             :                                ulong  const        data_lens[],
     556             :                                int    const        executables[],
     557             :                                ulong *             accounts_ignored,
     558             :                                ulong *             accounts_replaced,
     559             :                                ulong *             accounts_loaded,
     560             :                                ulong *             out_replaced_lamports,
     561             :                                ulong *             out_ignored_lamports );
     562             : 
     563             : /* fd_accdb_background performs one unit of background work.
     564             : 
     565             :    THREADING MODEL
     566             : 
     567             :    The accdb API is split across three thread roles:
     568             : 
     569             :      T1 (replay): calls attach_child, advance_root, purge, acquire, and
     570             :          release.  attach_child runs inline on T1. advance_root and
     571             :          purge submit a command into a shared- memory slot and return
     572             :          immediately; the heavy work is deferred to T2.
     573             : 
     574             :      T2 (accdb tile / background): calls fd_accdb_background repeatedly.
     575             :          This is the only function T2 should call.
     576             : 
     577             :      T3 (executor tiles, 1..N): call acquire and release.
     578             : 
     579             :    acquire and release may be called concurrently from T1 and any number
     580             :    of T3 threads.  They must never be called concurrently with
     581             :    advance_root or purge on the same fork.
     582             : 
     583             :    fd_accdb_background must be called from exactly one thread (T2). It
     584             :    must not be called concurrently with itself.
     585             : 
     586             :    BEHAVIOR
     587             : 
     588             :    First checks for a pending advance_root or purge command from T1; if
     589             :    one is present it executes the command, sets *charge_busy to 1, and
     590             :    returns immediately without doing compaction. Otherwise, attempts one
     591             :    step of compaction at each layer, setting *charge_busy if work was
     592             :    done. */
     593             : 
     594             : void
     595             : fd_accdb_background( fd_accdb_t * accdb,
     596             :                      int *        charge_busy );
     597             : 
     598             : /* fd_accdb_shmetrics returns a pointer to the shared metrics counters
     599             :    for the given accdb instance.  The returned pointer remains valid
     600             :    for the lifetime of the underlying shmem. */
     601             : 
     602             : fd_accdb_shmem_metrics_t const *
     603             : fd_accdb_shmetrics( fd_accdb_t * accdb );
     604             : 
     605             : /* fd_accdb_metrics returns a pointer to the per-thread metrics counters
     606             :    for the given accdb instance.  The returned pointer remains valid
     607             :    for the lifetime of the underlying shmem. */
     608             : 
     609             : fd_accdb_metrics_t const *
     610             : fd_accdb_metrics( fd_accdb_t * accdb );
     611             : 
     612             : /* fd_accdb_cache_class_occupancy snapshots the current per-size-class
     613             :    cache occupancy and capacity into the caller-provided arrays, each
     614             :    of which must have FD_ACCDB_CACHE_CLASS_CNT entries.  used[c] is the
     615             :    number of slots in class c that currently hold a cache acc (i.e.
     616             :    slots that have been allocated lazily and are not sitting in the
     617             :    free list).  max[c] is the total slot capacity of class c.  Reads
     618             :    are done with relaxed (volatile) loads and may be momentarily
     619             :    inconsistent with each other under contention. */
     620             : 
     621             : void
     622             : fd_accdb_cache_class_occupancy( fd_accdb_t * accdb,
     623             :                                 ulong *      used,
     624             :                                 ulong *      max,
     625             :                                 ulong *      reserved );
     626             : 
     627             : /* fd_accdb_cache_class_thresholds returns the per-size-class preeviction
     628             :    thresholds, expressed as used-slot counts (so they're directly
     629             :    comparable to occupancy.used and occupancy.max).  Each output array
     630             :    must have FD_ACCDB_CACHE_CLASS_CNT entries.  target_used[c] is the
     631             :    used count the background preevict pass tries to drive towards (max -
     632             :    cache_free_target).  low_water_used[c] is the used count at which the
     633             :    preevict pass starts firing (max - cache_free_low_water).  Both are
     634             :    set once at init and are stable for the lifetime of the cache. */
     635             : 
     636             : void
     637             : fd_accdb_cache_class_thresholds( fd_accdb_t * accdb,
     638             :                                  ulong *      target_used,
     639             :                                  ulong *      low_water_used );
     640             : 
     641             : /* FD_ACCDB_METRICS_WRITE publishes the per-joiner accdb runtime metrics
     642             :    for tile prefix TILE.  TILE must be a tile that declares the
     643             :    AccdbAccountAcquired/... counters in metrics.xml (e.g. EXECLE,
     644             :    EXECRP, REPLAY, TOWER, ACCDB).  m must be a fd_accdb_metrics_t const *
     645             :    for the joiner whose counters should be published. */
     646             : 
     647           0 : #define FD_ACCDB_METRICS_WRITE( TILE, m ) do {                                              \
     648           0 :     fd_accdb_metrics_t const * _m = (m);                                                    \
     649           0 :     FD_MCNT_ENUM_COPY( TILE, ACCDB_ACCOUNT_ACQUIRED,          _m->accounts_acquired_per_class          ); \
     650           0 :     FD_MCNT_ENUM_COPY( TILE, ACCDB_ACCOUNT_WRITABLE_ACQUIRED, _m->writable_accounts_acquired_per_class ); \
     651           0 :     FD_MCNT_ENUM_COPY( TILE, ACCDB_ACCOUNT_EVICTED,        _m->accounts_evicted_per_class        ); \
     652           0 :     FD_MCNT_ENUM_COPY( TILE, ACCDB_ACCOUNT_COMMITTED_NEW,       _m->accounts_committed_new_per_class       ); \
     653           0 :     FD_MCNT_ENUM_COPY( TILE, ACCDB_ACCOUNT_COMMITTED_OVERWRITE, _m->accounts_committed_overwrite_per_class ); \
     654           0 :     FD_MCNT_ENUM_COPY( TILE, ACCDB_ACCOUNT_NOT_FOUND,   _m->accounts_not_found_per_class ); \
     655           0 :     FD_MCNT_SET( TILE, ACCDB_ACCOUNT_WAITED,             _m->accounts_waited            ); \
     656           0 :     FD_MCNT_SET( TILE, ACCDB_BATCH_ACQUIRED,             _m->acquire_calls              ); \
     657           0 :     FD_MCNT_SET( TILE, ACCDB_ACQUIRE_FAILED,             _m->acquire_failed             ); \
     658           0 :     FD_MCNT_SET( TILE, ACCDB_BYTES_READ,                 _m->bytes_read                 ); \
     659           0 :     FD_MCNT_SET( TILE, ACCDB_READ_OPERATION,             _m->read_ops                   ); \
     660           0 :     FD_MCNT_SET( TILE, ACCDB_BYTES_WRITTEN,              _m->bytes_written              ); \
     661           0 :     FD_MCNT_SET( TILE, ACCDB_WRITE_OPERATION,            _m->write_ops                  ); \
     662           0 :     FD_MCNT_SET( TILE, ACCDB_BYTES_COPIED,               _m->bytes_copied               ); \
     663           0 :   } while(0)
     664             : 
     665             : /* FD_ACCDB_METRICS_WRITE_RO is the read-only joiner subset of
     666             :    FD_ACCDB_METRICS_WRITE.  It only emits the counters that
     667             :    fd_accdb_read_one_nocache touches; tiles that join readonly
     668             :    (e.g. RPC) declare only this subset of counters in metrics.xml. */
     669             : 
     670           0 : #define FD_ACCDB_METRICS_WRITE_RO( TILE, m ) do {                                           \
     671           0 :     fd_accdb_metrics_t const * _m = (m);                                                    \
     672           0 :     FD_MCNT_ENUM_COPY( TILE, ACCDB_ACCOUNT_ACQUIRED,  _m->accounts_acquired_per_class  ); \
     673           0 :     FD_MCNT_ENUM_COPY( TILE, ACCDB_ACCOUNT_NOT_FOUND, _m->accounts_not_found_per_class ); \
     674           0 :     FD_MCNT_SET( TILE, ACCDB_ACCOUNT_WAITED,    _m->accounts_waited   ); \
     675           0 :     FD_MCNT_SET( TILE, ACCDB_BATCH_ACQUIRED,    _m->acquire_calls     ); \
     676           0 :     FD_MCNT_SET( TILE, ACCDB_BYTES_READ,        _m->bytes_read        ); \
     677           0 :     FD_MCNT_SET( TILE, ACCDB_READ_OPERATION,    _m->read_ops          ); \
     678           0 :     FD_MCNT_SET( TILE, ACCDB_BYTES_COPIED,      _m->bytes_copied      ); \
     679           0 :   } while(0)
     680             : 
     681             : FD_PROTOTYPES_END
     682             : 
     683             : #endif /* HEADER_fd_src_flamenco_accdb_fd_accdb_h */

Generated by: LCOV version 1.14