LCOV - cov.lcov - flamenco/runtime/fd

LCOV - code coverage report

Current view:	top level - flamenco/runtime - fd_blockstore.h (source / functions)		Hit	Total	Coverage
Test:	cov.lcov	Lines:	0	100	0.0 %
Date:	2025-07-09 04:58:18	Functions:	0	1152	0.0 %

          Line data    Source code

       1             : #ifndef HEADER_fd_src_flamenco_runtime_fd_blockstore_h
       2             : #define HEADER_fd_src_flamenco_runtime_fd_blockstore_h
       3             : 
       4             : /* Blockstore is a high-performance database for in-memory indexing and
       5             :    durably storing blocks.
       6             : 
       7             :    `fd_blockstore` defines a number of useful types e.g. `fd_block_t`,
       8             :    `fd_block_shred`, etc.
       9             : 
      10             :    The blockstore alloc is used for allocating wksp resources for shred
      11             :    headers, microblock headers, and blocks.  This is an fd_alloc.
      12             :    Allocations from this allocator will be tagged with wksp_tag and
      13             :    operations on this allocator will use concurrency group 0. */
      14             : 
      15             : #include "../../ballet/block/fd_microblock.h"
      16             : #include "../../ballet/shred/fd_deshredder.h"
      17             : #include "../../ballet/shred/fd_shred.h"
      18             : #include "../fd_flamenco_base.h"
      19             : #include "../types/fd_types.h"
      20             : #include "fd_rwseq_lock.h"
      21             : #include "stdbool.h"
      22             : #include <fcntl.h>
      23             : 
      24             : /* FD_BLOCKSTORE_ALIGN specifies the alignment needed for blockstore.
      25             :    ALIGN is double x86 cache line to mitigate various kinds of false
      26             :    sharing (eg. ACLPF adjacent cache line prefetch). */
      27             : 
      28           0 : #define FD_BLOCKSTORE_ALIGN (128UL)
      29             : 
      30             : /* FD_BLOCKSTORE_MAGIC defines a magic number for verifying the memory
      31             :    of blockstore is not corrupted. */
      32             : 
      33           0 : #define FD_BLOCKSTORE_MAGIC (0xf17eda2ce7b10c00UL) /* firedancer bloc version 0 */
      34             : 
      35             : /* DO NOT MODIFY. */
      36             : // #define FD_BUF_SHRED_MAP_MAX (1UL << 24UL) /* 16 million shreds can be buffered */
      37             : 
      38             : /* TODO this can be removed if we explicitly manage a memory pool for
      39             :    the fd_block_map_t entries */
      40           0 : #define FD_BLOCKSTORE_CHILD_SLOT_MAX    (32UL)        /* the maximum # of children a slot can have */
      41           0 : #define FD_BLOCKSTORE_ARCHIVE_MIN_SIZE  (1UL << 26UL) /* 64MB := ceil(MAX_DATA_SHREDS_PER_SLOT*1228) */
      42             : 
      43             : /* FD_SLICE_ALIGN specifies the alignment needed for a block slice.
      44             :    ALIGN is double x86 cache line to mitigate various kinds of false
      45             :    sharing (eg. ACLPF adjacent cache line prefetch). */
      46             : 
      47             : #define FD_SLICE_ALIGN (128UL)
      48             : 
      49             : /* FD_SLICE_MAX specifies the maximum size of an entry batch. This is
      50             :    equivalent to the maximum size of a block (ie. a block with a single
      51             :    entry batch). */
      52             : 
      53           0 : #define FD_SLICE_MAX (FD_SHRED_DATA_PAYLOAD_MAX_PER_SLOT)
      54             : 
      55             : /* 64 ticks per slot, and then one min size transaction per microblock
      56             :    for all the remaining microblocks.
      57             :    This bound should be used along with the transaction parser and tick
      58             :    verifier to enforce the assumptions.
      59             :    This is NOT a standalone conservative bound against malicious
      60             :    validators.
      61             :    A tighter bound could probably be derived if necessary. */
      62             : 
      63           0 : #define FD_MICROBLOCK_MAX_PER_SLOT ((FD_SHRED_DATA_PAYLOAD_MAX_PER_SLOT - 64UL*sizeof(fd_microblock_hdr_t)) / (sizeof(fd_microblock_hdr_t)+FD_TXN_MIN_SERIALIZED_SZ) + 64UL) /* 200,796 */
      64             : /* 64 ticks per slot, and a single gigantic microblock containing min
      65             :    size transactions. */
      66             : #define FD_TXN_MAX_PER_SLOT ((FD_SHRED_DATA_PAYLOAD_MAX_PER_SLOT - 65UL*sizeof(fd_microblock_hdr_t)) / (FD_TXN_MIN_SERIALIZED_SZ)) /* 272,635 */
      67             : 
      68             : // TODO centralize these
      69             : // https://github.com/firedancer-io/solana/blob/v1.17.5/sdk/program/src/clock.rs#L34
      70             : #define FD_MS_PER_TICK 6
      71             : 
      72             : // https://github.com/firedancer-io/solana/blob/v1.17.5/core/src/repair/repair_service.rs#L55
      73             : #define FD_REPAIR_TIMEOUT (200 / FD_MS_PER_TICK)
      74             : 
      75           0 : #define FD_BLOCKSTORE_SUCCESS                  0
      76             : #define FD_BLOCKSTORE_SUCCESS_SLOT_COMPLETE    1
      77           0 : #define FD_BLOCKSTORE_ERR_INVAL   (-1)
      78             : #define FD_BLOCKSTORE_ERR_AGAIN   (-2)
      79           0 : #define FD_BLOCKSTORE_ERR_CORRUPT (-3)
      80             : #define FD_BLOCKSTORE_ERR_EMPTY   (-4)
      81             : #define FD_BLOCKSTORE_ERR_FULL    (-5)
      82           0 : #define FD_BLOCKSTORE_ERR_KEY     (-6)
      83             : #define FD_BLOCKSTORE_ERR_SHRED_FULL      -1 /* no space left for shreds */
      84             : #define FD_BLOCKSTORE_ERR_SLOT_FULL       -2 /* no space left for slots */
      85             : #define FD_BLOCKSTORE_ERR_SHRED_MISSING   -4
      86           0 : #define FD_BLOCKSTORE_ERR_SLOT_MISSING    -5
      87           0 : #define FD_BLOCKSTORE_ERR_SHRED_INVALID   -7 /* shred was invalid */
      88             : #define FD_BLOCKSTORE_ERR_DESHRED_INVALID -8 /* deshredded block was invalid */
      89           0 : #define FD_BLOCKSTORE_ERR_NO_MEM          -9 /* no mem */
      90             : #define FD_BLOCKSTORE_ERR_UNKNOWN         -99
      91             : 
      92           0 : static inline char const * fd_blockstore_strerror( int err ) {
      93           0 :   switch( err ) {
      94           0 :   case FD_BLOCKSTORE_SUCCESS:     return "success";
      95           0 :   case FD_BLOCKSTORE_ERR_INVAL:   return "bad input";
      96           0 :   case FD_BLOCKSTORE_ERR_AGAIN:   return "try again";
      97           0 :   case FD_BLOCKSTORE_ERR_CORRUPT: return "corruption detected";
      98           0 :   case FD_BLOCKSTORE_ERR_EMPTY:   return "empty";
      99           0 :   case FD_BLOCKSTORE_ERR_FULL:    return "full";
     100           0 :   case FD_BLOCKSTORE_ERR_KEY:     return "key not found";
     101           0 :   default: break;
     102           0 :   }
     103           0 :   return "unknown";
     104           0 : }
     105             : 
     106             : struct fd_shred_key {
     107             :   ulong slot;
     108             :   uint  idx;
     109             : };
     110             : typedef struct fd_shred_key fd_shred_key_t;
     111             : 
     112             : static const fd_shred_key_t     fd_shred_key_null = { 0 };
     113             : #define FD_SHRED_KEY_NULL       fd_shred_key_null
     114             : #define FD_SHRED_KEY_INVAL(key) (!((key).slot) & !((key).idx))
     115           0 : #define FD_SHRED_KEY_EQ(k0,k1)  (!(((k0).slot) ^ ((k1).slot))) & !(((k0).idx) ^ (((k1).idx)))
     116           0 : #define FD_SHRED_KEY_HASH(key)  ((uint)(((key).slot)<<15UL) | (((key).idx))) /* current max shred idx is 32KB = 2 << 15*/
     117             : 
     118             : /* fd_buf_shred is a thin wrapper around fd_shred_t that facilitates
     119             :    buffering data shreds before all the shreds for a slot have been
     120             :    received. After all shreds are received, these buffered shreds are
     121             :    released back into memory pool and future queries for the shreds are
     122             :    offset into the block data directly.
     123             : 
     124             :    The blockstore is only aware of data shreds and all APIs involving
     125             :    shreds refers to data shreds.
     126             : 
     127             :    Shreds are buffered into a map as they are received:
     128             : 
     129             :    | 0 | 1 | 2 | x | x | 5 | x |
     130             :              ^           ^
     131             :              c           r
     132             : 
     133             :    c = "consumed" = contiguous window starting from index 0
     134             :    r = "received" = highest index received so far
     135             : 
     136             :    Shred memory layout while stored in the map:
     137             : 
     138             :    | shred hdr | shred payload |
     139             : */
     140             : struct __attribute__((aligned(128UL))) fd_buf_shred {
     141             :   fd_shred_key_t key;
     142             :   ulong          prev;
     143             :   ulong          next;
     144             :   ulong          memo;
     145             :   int            eqvoc; /* we've seen an equivocating version of this
     146             :                              shred (same key but different payload). */
     147             :   union {
     148             :     fd_shred_t hdr;                  /* shred header */
     149             :     uchar      buf[FD_SHRED_MIN_SZ]; /* the entire shred buffer, both header and payload. */
     150             :   };
     151             : };
     152             : typedef struct fd_buf_shred fd_buf_shred_t;
     153             : 
     154             : #define POOL_NAME  fd_buf_shred_pool
     155           0 : #define POOL_ELE_T fd_buf_shred_t
     156             : #include "../../util/tmpl/fd_pool_para.c"
     157             : 
     158             : #define MAP_NAME               fd_buf_shred_map
     159           0 : #define MAP_ELE_T              fd_buf_shred_t
     160           0 : #define MAP_KEY_T              fd_shred_key_t
     161           0 : #define MAP_KEY_EQ(k0,k1)      (FD_SHRED_KEY_EQ(*k0,*k1))
     162             : #define MAP_KEY_EQ_IS_SLOW     1
     163           0 : #define MAP_KEY_HASH(key,seed) (FD_SHRED_KEY_HASH(*key)^seed)
     164             : #include "../../util/tmpl/fd_map_chain_para.c"
     165             : 
     166             : #define DEQUE_NAME fd_slot_deque
     167           0 : #define DEQUE_T    ulong
     168             : #include "../../util/tmpl/fd_deque_dynamic.c"
     169             : 
     170             : /* fd_block_shred_t is a shred that has been assembled into a block. The
     171             :    shred begins at `off` relative to the start of the block's data
     172             :    region. */
     173             : struct fd_block_shred {
     174             :   fd_shred_t hdr; /* ptr to the data shred header */
     175             :   ulong      off; /* offset to the payload relative to the start of the block's data region */
     176             : };
     177             : typedef struct fd_block_shred fd_block_shred_t;
     178             : 
     179             : /*
     180             :  * fd_block_entry_batch_t is a microblock/entry batch within a block.
     181             :  * The offset is relative to the start of the block's data region,
     182             :  * and indicates where the batch ends.  The (exclusive) end offset of
     183             :  * batch i is the (inclusive) start offset of batch i+1.  The 0th batch
     184             :  * always starts at offset 0.
     185             :  * On the wire, the presence of one of the COMPLETE flags in a data
     186             :  * shred marks the end of a batch.
     187             :  * In other words, batch ends are aligned with shred ends, and batch
     188             :  * starts are aligned with shred starts.  Usually a batch comprises
     189             :  * multiple shreds, and a block comprises multiple batches.
     190             :  * This information is useful because bincode deserialization needs to
     191             :  * be performed on a per-batch basis.  Precisely a single array of
     192             :  * microblocks/entries is expected to be deserialized from a batch.
     193             :  * Trailing bytes in each batch are ignored by default.
     194             :  */
     195             : struct fd_block_entry_batch {
     196             :   ulong end_off; /* exclusive */
     197             : };
     198             : typedef struct fd_block_entry_batch fd_block_entry_batch_t;
     199             : 
     200             : /* fd_block_micro_t is a microblock ("entry" in Solana parlance) within
     201             :    a block. The microblock begins at `off` relative to the start of the
     202             :    block's data region. */
     203             : struct fd_block_micro {
     204             :   ulong off; /* offset into block data */
     205             : };
     206             : typedef struct fd_block_micro fd_block_micro_t;
     207             : 
     208             : /* If the 0th bit is set, this indicates the block is preparing, which
     209             :    means it might be partially executed e.g. a subset of the microblocks
     210             :    have been executed.  It is not safe to remove, relocate, or modify
     211             :    the block in any way at this time.
     212             : 
     213             :    Callers holding a pointer to a block should always make sure to
     214             :    inspect this flag.
     215             : 
     216             :    Other flags mainly provide useful metadata for read-only callers, eg.
     217             :    RPC. */
     218             : 
     219           0 : #define FD_BLOCK_FLAG_RECEIVING 0 /* xxxxxxx1 still receiving shreds */
     220           0 : #define FD_BLOCK_FLAG_COMPLETED 1 /* xxxxxx1x received the block ie. all shreds (SLOT_COMPLETE) */
     221           0 : #define FD_BLOCK_FLAG_REPLAYING 2 /* xxxxx1xx replay in progress (DO NOT REMOVE) */
     222           0 : #define FD_BLOCK_FLAG_PROCESSED 3 /* xxxx1xxx successfully replayed the block */
     223           0 : #define FD_BLOCK_FLAG_EQVOCSAFE 4 /* xxxx1xxx 52% of cluster has voted on this (slot, bank hash) */
     224           0 : #define FD_BLOCK_FLAG_CONFIRMED 5 /* xxx1xxxx 2/3 of cluster has voted on this (slot, bank hash) */
     225           0 : #define FD_BLOCK_FLAG_FINALIZED 6 /* xx1xxxxx 2/3 of cluster has rooted this slot */
     226             : #define FD_BLOCK_FLAG_DEADBLOCK 7 /* x1xxxxxx failed to replay the block */
     227             : 
     228             : /* Rewards assigned after block is executed */
     229             : 
     230             : struct fd_block_rewards {
     231             :   ulong collected_fees;
     232             :   fd_hash_t leader;
     233             :   ulong post_balance;
     234             : };
     235             : typedef struct fd_block_rewards fd_block_rewards_t;
     236             : 
     237             : /* Remaining bits [4, 8) are reserved.
     238             : 
     239             :    To avoid confusion, please use `fd_bits.h` API
     240             :    ie. `fd_uchar_set_bit`, `fd_uchar_extract_bit`. */
     241             : 
     242             : #define SET_NAME fd_block_set
     243             : #define SET_MAX  FD_SHRED_BLK_MAX
     244             : #include "../../util/tmpl/fd_set.c"
     245             : 
     246             : struct fd_block_info {
     247             :   ulong slot; /* map key */
     248             :   ulong next; /* reserved for use by fd_map_giant.c */
     249             : 
     250             :   /* Ancestry */
     251             : 
     252             :   ulong parent_slot;
     253             :   ulong child_slots[FD_BLOCKSTORE_CHILD_SLOT_MAX];
     254             :   ulong child_slot_cnt;
     255             : 
     256             :   /* Metadata */
     257             : 
     258             :   ulong     block_height;
     259             :   fd_hash_t block_hash;
     260             :   fd_hash_t bank_hash;
     261             :   fd_hash_t merkle_hash;    /* the last FEC set's merkle hash */
     262             :   ulong     fec_cnt;        /* the number of FEC sets in the slot */
     263             :   uchar     flags;
     264             :   long      ts;             /* the wallclock time when we finished receiving the block. */
     265             : 
     266             :   /* Windowing
     267             : 
     268             :      Shreds are buffered into a map as they are received:
     269             : 
     270             :      | 0 | 1 | 2 | x | x | 5 | x |
     271             :            ^   ^           ^
     272             :            c   b           r
     273             : 
     274             :      c = "consumed" = contiguous shred idxs that have been consumed.
     275             :                       the "consumer" is replay and the idx is
     276             :                       incremented after replaying each block slice.
     277             :      b = "buffered" = contiguous shred idxs that have been buffered.
     278             :                       when buffered == block_slice_end the next slice of
     279             :                       a block is ready for replay.
     280             :      r = "received" = highest shred idx received so far. used to detect
     281             :                       when repair is needed.
     282             :   */
     283             : 
     284             :   uint consumed_idx; /* the highest shred idx we've contiguously consumed (consecutive from 0). */
     285             :   uint buffered_idx; /* the highest shred idx we've contiguously buffered (consecutive from 0). */
     286             :   uint received_idx; /* the highest shred idx we've received (can be out-of-order). */
     287             : 
     288             :   uint data_complete_idx; /* the highest shred idx wrt contiguous entry batches (inclusive). */
     289             :   uint slot_complete_idx; /* the highest shred idx for the entire slot (inclusive). */
     290             : 
     291             :   /* This is a bit vec (fd_set) that tracks every shred idx marked with
     292             :      FD_SHRED_DATA_FLAG_DATA_COMPLETE. The bit position in the fd_set
     293             :      corresponds to the shred's index. Note shreds can be received
     294             :      out-of-order so higher bits might be set before lower bits. */
     295             : 
     296             :   fd_block_set_t data_complete_idxs[FD_SHRED_BLK_MAX / sizeof(ulong)];
     297             : 
     298             :   /* Helpers for batching tick verification */
     299             : 
     300             :   ulong ticks_consumed;
     301             :   ulong tick_hash_count_accum;
     302             :   fd_hash_t in_poh_hash; /* TODO: might not be best place to hold this */
     303             : 
     304             :   /* Block */
     305             : 
     306             :   ulong block_gaddr; /* global address to the start of the allocated fd_block_t */
     307             : };
     308             : typedef struct fd_block_info fd_block_info_t;
     309             : 
     310             : #define MAP_NAME                  fd_block_map
     311           0 : #define MAP_ELE_T                 fd_block_info_t
     312           0 : #define MAP_KEY                   slot
     313           0 : #define MAP_ELE_IS_FREE(ctx, ele) ((ele)->slot == ULONG_MAX)
     314           0 : #define MAP_ELE_FREE(ctx, ele)    ((ele)->slot =  ULONG_MAX)
     315           0 : #define MAP_ELE_MOVE(ctx,dst,src) do { MAP_ELE_T * _src = (src); (*(dst)) = *_src; _src->MAP_KEY = (MAP_KEY_T)ULONG_MAX; } while(0)
     316           0 : #define MAP_KEY_HASH(key, seed)   (void)(seed), (*(key))
     317             : #include "../../util/tmpl/fd_map_slot_para.c"
     318             : 
     319           0 : #define BLOCK_INFO_LOCK_CNT  1024UL
     320             : #define BLOCK_INFO_PROBE_CNT 2UL
     321             : /*
     322             :    Rationale for block_map parameters:
     323             :     - each lock manages block_max / lock_cnt elements, so with block_max
     324             :       at 4096, each lock would manage 4 contiguous elements.
     325             :     - Since keys are unique and increment by 1, we can index key to map
     326             :       bucket by taking key % ele_max directly. This way in theory we
     327             :       have perfect hashing and never need to probe.
     328             :        - This breaks when we store more than 4096 contiguous slots,
     329             :          i.e.: slot 0 collides with slot 4096, but this is at heart an
     330             :          OOM issue.
     331             :     - Causes possible contention - consider if we execute n, but are
     332             :       storing shreds for n+1 -- these are managed by the same lock.
     333             :       Perhaps opportunity for optimization.
     334             : */
     335             : 
     336             : /* fd_block_idx is an in-memory index of finalized blocks that have been
     337             :    archived to disk.  It records the slot together with the byte offset
     338             :    relative to the start of the file. */
     339             : 
     340             : struct fd_block_idx {
     341             :   ulong     slot;
     342             :   ulong     next;
     343             :   uint      hash;
     344             :   ulong     off;
     345             :   fd_hash_t block_hash;
     346             :   fd_hash_t bank_hash;
     347             : };
     348             : typedef struct fd_block_idx fd_block_idx_t;
     349             : 
     350             : #define MAP_NAME          fd_block_idx
     351           0 : #define MAP_T             fd_block_idx_t
     352           0 : #define MAP_KEY           slot
     353           0 : #define MAP_KEY_HASH(key) ((uint)(key)) /* finalized slots are guaranteed to be unique so perfect hashing */
     354           0 : #define MAP_KEY_INVAL(k)  (k == ULONG_MAX)
     355             : #include "../../util/tmpl/fd_map_dynamic.c"
     356             : 
     357             : /* fd_blockstore_archiver outlines the format of metadata
     358             :    at the start of an archive file - needed so that archive
     359             :    files can be read back on initialization. */
     360             : 
     361             : struct fd_blockstore_archiver {
     362             :   ulong fd_size_max;      /* maximum size of the archival file */
     363             :   ulong num_blocks;       /* number of blocks in the archival file. needed for reading back */
     364             :   ulong head;             /* location of least recently written block */
     365             :   ulong tail;             /* location after most recently written block */
     366             : };
     367             : typedef struct fd_blockstore_archiver fd_blockstore_archiver_t;
     368           0 : #define FD_BLOCKSTORE_ARCHIVE_START sizeof(fd_blockstore_archiver_t)
     369             : 
     370             : /*   CONCURRENCY NOTES FOR BLOCKSTORE ENJOINERS:
     371             : 
     372             :    With the parallelization of the shred map and block map, parts of the
     373             :    blockstore are concurrent, and parts are not. Block map and shred map
     374             :    have their own locks, which are managed through the
     375             :    query_try/query_test APIs. When accessing buf_shred_t and
     376             :    block_info_t items then, the caller does not need to use
     377             :    blockstore_start/end_read/write. However, the
     378             :    blockstore_start/end_read/write still protects the blockstore_shmem_t
     379             :    object. If you are reading and writing any blockstore_shmem fields
     380             :    and at the same time accessing the block_info_t or buf_shred_t, you
     381             :    should call both the blockstore_start/end_read/write APIs AND the map
     382             :    query_try/test APIs. These are locks of separate concerns and will
     383             :    not deadlock with each other. TODO update docs when we switch to
     384             :    fenced read/write for primitive fields in shmem_t. */
     385             : struct __attribute__((aligned(FD_BLOCKSTORE_ALIGN))) fd_blockstore_shmem {
     386             : 
     387             :   /* Metadata */
     388             : 
     389             :   ulong magic;
     390             :   ulong blockstore_gaddr;
     391             :   ulong wksp_tag;
     392             :   ulong seed;
     393             : 
     394             :   /* Persistence */
     395             : 
     396             :   fd_blockstore_archiver_t archiver;
     397             :   ulong mrw_slot; /* most recently written slot */
     398             : 
     399             :   /* Slot metadata */
     400             : 
     401             :   ulong lps; /* latest processed slot */
     402             :   ulong hcs; /* highest confirmed slot */
     403             :   ulong wmk; /* watermark. DO NOT MODIFY DIRECTLY. */
     404             : 
     405             :   /* Config limits */
     406             : 
     407             :   ulong shred_max; /* maximum # of shreds that can be held in memory */
     408             :   ulong block_max; /* maximum # of blocks that can be held in memory */
     409             :   ulong idx_max;   /* maximum # of blocks that can be indexed from the archival file */
     410             :   ulong alloc_max; /* maximum bytes that can be allocated */
     411             : 
     412             :   //ulong block_map_gaddr;  /* map of slot->(slot_meta, block) */
     413             :   ulong block_idx_gaddr;  /* map of slot->byte offset in archival file */
     414             :   ulong slot_deque_gaddr; /* deque of slot numbers */
     415             : 
     416             :   ulong alloc_gaddr;
     417             : };
     418             : typedef struct fd_blockstore_shmem fd_blockstore_shmem_t;
     419             : 
     420             : /* fd_blockstore_t is a local join to the blockstore.  This is specific
     421             :    to the local address space should not be shared across tiles. */
     422             : 
     423             : struct fd_blockstore {
     424             : 
     425             :   /* shared memory region */
     426             : 
     427             :   fd_blockstore_shmem_t * shmem; /* read/writes to shmem must call fd_blockstore_start_read()*/
     428             : 
     429             :   /* local join handles */
     430             : 
     431             :   fd_buf_shred_pool_t shred_pool[1];
     432             :   fd_buf_shred_map_t  shred_map[1];
     433             :   fd_block_map_t      block_map[1];
     434             : };
     435             : typedef struct fd_blockstore fd_blockstore_t;
     436             : 
     437             : FD_PROTOTYPES_BEGIN
     438             : 
     439             : /* Construction API */
     440             : 
     441             : FD_FN_CONST static inline ulong
     442           0 : fd_blockstore_align( void ) {
     443           0 :   return FD_BLOCKSTORE_ALIGN;
     444           0 : }
     445             : 
     446             : /* fd_blockstore_footprint returns the footprint of the entire
     447             :    blockstore shared memory region occupied by `fd_blockstore_shmem_t`
     448             :    including data structures. */
     449             : 
     450             : FD_FN_CONST static inline ulong
     451           0 : fd_blockstore_footprint( ulong shred_max, ulong block_max, ulong idx_max ) {
     452             :   /* TODO -- when removing, make change in fd_blockstore_new as well */
     453           0 :   block_max      = fd_ulong_pow2_up( block_max );
     454           0 :   ulong lock_cnt = fd_ulong_min( block_max, BLOCK_INFO_LOCK_CNT );
     455             : 
     456           0 :   int lg_idx_max = fd_ulong_find_msb( fd_ulong_pow2_up( idx_max ) );
     457           0 :   return FD_LAYOUT_FINI(
     458           0 :     FD_LAYOUT_APPEND(
     459           0 :     FD_LAYOUT_APPEND(
     460           0 :     FD_LAYOUT_APPEND(
     461           0 :     FD_LAYOUT_APPEND(
     462           0 :     FD_LAYOUT_APPEND(
     463           0 :     FD_LAYOUT_APPEND(
     464           0 :     FD_LAYOUT_APPEND(
     465           0 :     FD_LAYOUT_APPEND(
     466           0 :     FD_LAYOUT_APPEND(
     467           0 :     FD_LAYOUT_INIT,
     468           0 :       alignof(fd_blockstore_shmem_t), sizeof(fd_blockstore_shmem_t) ),
     469           0 :       alignof(fd_buf_shred_t),        sizeof(fd_buf_shred_t) * shred_max ),
     470           0 :       fd_buf_shred_pool_align(),      fd_buf_shred_pool_footprint() ),
     471           0 :       fd_buf_shred_map_align(),       fd_buf_shred_map_footprint( shred_max ) ),
     472           0 :       alignof(fd_block_info_t),        sizeof(fd_block_info_t) * block_max ),
     473           0 :       fd_block_map_align(),           fd_block_map_footprint( block_max, lock_cnt, BLOCK_INFO_PROBE_CNT ) ),
     474           0 :       fd_block_idx_align(),           fd_block_idx_footprint( lg_idx_max ) ),
     475           0 :       fd_slot_deque_align(),          fd_slot_deque_footprint( block_max ) ),
     476           0 :       fd_alloc_align(),               fd_alloc_footprint() ),
     477           0 :     fd_blockstore_align() );
     478           0 : }
     479             : 
     480             : /* fd_blockstore_new formats a memory region with the appropriate
     481             :    alignment and footprint into a blockstore.  shmem points in the
     482             :    caller's address space of the memory region to format.  Returns shmem
     483             :    on success (blockstore has ownership of the memory region) and NULL
     484             :    on failure (no changes, logs details).  Caller is not joined on
     485             :    return.  The blockstore will be empty and unlocked. */
     486             : 
     487             : void *
     488             : fd_blockstore_new( void * shmem,
     489             :                    ulong  wksp_tag,
     490             :                    ulong  seed,
     491             :                    ulong  shred_max,
     492             :                    ulong  block_max,
     493             :                    ulong  idx_max );
     494             : 
     495             : /* fd_blockstore_join joins a blockstore.  ljoin points to a
     496             :    fd_blockstore_t compatible memory region in the caller's address
     497             :    space used to hold info about the local join, shblockstore points in
     498             :    the caller's address space to the memory region containing the
     499             :    blockstore.  Returns a handle to the caller's local join on success
     500             :    (join has ownership of the ljoin region) and NULL on failure (no
     501             :    changes, logs details). */
     502             : 
     503             : fd_blockstore_t *
     504             : fd_blockstore_join( void * ljoin, void * shblockstore );
     505             : 
     506             : void *
     507             : fd_blockstore_leave( fd_blockstore_t * blockstore );
     508             : 
     509             : void *
     510             : fd_blockstore_delete( void * shblockstore );
     511             : 
     512             : /* fd_blockstore_init initializes a blockstore with the given
     513             :    `slot_bank`.  This bank is used for initializing fields (SMR, etc.),
     514             :    and should be the bank upon finishing a snapshot load if booting from
     515             :    a snapshot, genesis bank otherwise.  It is also used to "fake" the
     516             :    snapshot block as if that block's data were available.  The metadata
     517             :    for this block's slot will be populated (fd_block_map_t) but the
     518             :    actual block data (fd_block_t) won't exist. This is done to bootstrap
     519             :    the various components for live replay (turbine, repair, etc.)
     520             : 
     521             :    `fd` is a file descriptor for the blockstore archival file.  As part
     522             :    of `init`, blockstore rebuilds an in-memory index of the archival
     523             :    file.  */
     524             : 
     525             : fd_blockstore_t *
     526             : fd_blockstore_init( fd_blockstore_t *      blockstore,
     527             :                     int                    fd,
     528             :                     ulong                  fd_size_max,
     529             :                     ulong                  slot );
     530             : 
     531             : /* fd_blockstore_fini finalizes a blockstore.
     532             : 
     533             :    IMPORTANT!  Caller MUST hold the read lock when calling this
     534             :    function. */
     535             : 
     536             : void
     537             : fd_blockstore_fini( fd_blockstore_t * blockstore );
     538             : 
     539             : /* Accessors */
     540             : 
     541             : /* fd_blockstore_wksp returns the local join to the wksp backing the
     542             :    blockstore. The lifetime of the returned pointer is at least as long
     543             :    as the lifetime of the local join.  Assumes blockstore is a current
     544             :    local join. */
     545             : 
     546             : FD_FN_PURE static inline fd_wksp_t *
     547           0 : fd_blockstore_wksp( fd_blockstore_t * blockstore ) {
     548           0 :   return (fd_wksp_t *)( ( (ulong)blockstore->shmem ) - blockstore->shmem->blockstore_gaddr );
     549           0 : }
     550             : 
     551             : /* fd_blockstore_wksp_tag returns the workspace allocation tag used by
     552             :    the blockstore for its wksp allocations.  Will be positive.  Assumes
     553             :    blockstore is a current local join. */
     554             : 
     555             : FD_FN_PURE static inline ulong
     556           0 : fd_blockstore_wksp_tag( fd_blockstore_t const * blockstore ) {
     557           0 :   return blockstore->shmem->wksp_tag;
     558           0 : }
     559             : 
     560             : /* fd_blockstore_seed returns the hash seed used by the blockstore for various hash
     561             :    functions.  Arbitrary value.  Assumes blockstore is a current local join.
     562             :    TODO: consider renaming hash_seed? */
     563             : FD_FN_PURE static inline ulong
     564           0 : fd_blockstore_seed( fd_blockstore_t const * blockstore ) {
     565           0 :   return blockstore->shmem->seed;
     566           0 : }
     567             : 
     568             : /* fd_block_idx returns a pointer in the caller's address space to the
     569             :    fd_block_idx_t in the blockstore wksp.  Assumes blockstore is local
     570             :    join.  Lifetime of the returned pointer is that of the local join. */
     571             : 
     572             : FD_FN_PURE static inline fd_block_idx_t *
     573           0 : fd_blockstore_block_idx( fd_blockstore_t * blockstore ) {
     574           0 :   return fd_wksp_laddr_fast( fd_blockstore_wksp( blockstore ), blockstore->shmem->block_idx_gaddr );
     575           0 : }
     576             : 
     577             : /* fd_slot_deque returns a pointer in the caller's address space to the
     578             :    fd_slot_deque_t in the blockstore wksp.  Assumes blockstore is local
     579             :    join.  Lifetime of the returned pointer is that of the local join. */
     580             : 
     581             : FD_FN_PURE static inline ulong *
     582           0 : fd_blockstore_slot_deque( fd_blockstore_t * blockstore ) {
     583           0 :   return fd_wksp_laddr_fast( fd_blockstore_wksp( blockstore), blockstore->shmem->slot_deque_gaddr );
     584           0 : }
     585             : 
     586             : /* fd_blockstore_alloc returns a pointer in the caller's address space to
     587             :    the blockstore's allocator. */
     588             : 
     589             : FD_FN_PURE static inline fd_alloc_t * /* Lifetime is that of the local join */
     590           0 : fd_blockstore_alloc( fd_blockstore_t * blockstore ) {
     591           0 :   return fd_wksp_laddr_fast( fd_blockstore_wksp( blockstore), blockstore->shmem->alloc_gaddr );
     592           0 : }
     593             : 
     594             : /* fd_blockstore_shred_test returns 1 if a shred keyed by (slot, idx) is
     595             :    already in the blockstore and 0 otherwise.  */
     596             : 
     597             : int
     598             : fd_blockstore_shred_test( fd_blockstore_t * blockstore, ulong slot, uint idx );
     599             : 
     600             : /* fd_buf_shred_query_copy_data queries the blockstore for shred at
     601             :    slot, shred_idx. Copies the shred data to the given buffer and
     602             :    returns the data size. Returns -1 on failure.
     603             : 
     604             :    IMPORTANT!  Caller MUST hold the read lock when calling this
     605             :    function. */
     606             : 
     607             : long
     608             : fd_buf_shred_query_copy_data( fd_blockstore_t * blockstore,
     609             :                               ulong             slot,
     610             :                               uint              shred_idx,
     611             :                               void *            buf,
     612             :                               ulong             buf_max );
     613             : 
     614             : /* fd_blockstore_block_hash_query performs a blocking query (concurrent
     615             :    writers are not blocked) for the block hash of slot.  Returns
     616             :    FD_BLOCKSTORE_SUCCESS on success and FD_BLOCKSTORE_ERR_KEY if slot is
     617             :    not in blockstore.  Cannot fail.  On success, a copy of the block
     618             :    hash will be populated in `block_hash`.  Retains no interest in
     619             :    `slot` or `block_hash`.
     620             : 
     621             :    The block hash is the final poh hash for a slot and available on the
     622             :    last microblock header. */
     623             : 
     624             : int
     625             : fd_blockstore_block_hash_query( fd_blockstore_t * blockstore, ulong slot, fd_hash_t * block_hash );
     626             : 
     627             : /* fd_blockstore_bank_hash_query performs a blocking query (concurrent
     628             :    writers are not blocked) for the bank hash of slot.  Returns
     629             :    FD_BLOCKSTORE_SUCCESS on success and FD_BLOCKSTORE_ERR_KEY if slot is
     630             :    not in blockstore.  Cannot fail.  On success, a copy of the bank hash
     631             :    will be populated in `bank_hash`.  Retains no interest in `slot` or
     632             :    `bank_hash`.
     633             : 
     634             :    The bank hash is a hash of the execution state (the "bank") after
     635             :    executing the block for a given slot. */
     636             : 
     637             : int
     638             : fd_blockstore_bank_hash_query( fd_blockstore_t * blockstore, ulong slot, fd_hash_t * bank_hash );
     639             : 
     640             : /* fd_blockstore_block_map_query queries the blockstore for the block
     641             :    map entry at slot.  Returns a pointer to the slot meta or NULL if not
     642             :    in blockstore.
     643             : 
     644             :    IMPORTANT! This should only be used for single-threaded / offline
     645             :    use-cases as it does not test the query. Read notes below for
     646             :    block_map usage in live. */
     647             : 
     648             : fd_block_info_t *
     649             : fd_blockstore_block_map_query( fd_blockstore_t * blockstore, ulong slot );
     650             : 
     651             : /* IMPORTANT! NOTES FOR block_map USAGE:
     652             : 
     653             :    The block_info entries must be queried using the query_try/query_test
     654             :    pattern. This will frequently look like:
     655             : 
     656             :    int err = FD_MAP_ERR_AGAIN;
     657             :    loop while( err == FD_MAP_ERR_AGAIN )
     658             :       block_map_query_t query;
     659             :       err = fd_block_map_query_try( nonblocking );
     660             :       block_info_t * ele = fd_block_map_query_ele(query);
     661             :       if ERROR is FD_MAP_ERR_KEY, then the slot is not found.
     662             :       if ERROR is FD_MAP_ERR_AGAIN, then immediately continue.
     663             :          // important to handle ALL possible return err codes *before*
     664             :          // accessing the ele, as the ele will be the sentinel (usually NULL)
     665             :       speculatively execute <stuff>
     666             :          - no side effects
     667             :          - no early return
     668             :       err = fd_block_map_query_test(query)
     669             :    end loop
     670             : 
     671             :    Some accessors are provided to callers that already do this pattern,
     672             :    and handle the looping querying. For example, block_hash_copy, and
     673             :    parent_slot_query. However, for most caller use cases, it would be
     674             :    much more effecient to use the query_try/query_test pattern directly.
     675             : 
     676             :    Example: if you are accessing a block_info_t m, and m->parent_slot to
     677             :    the blockstore->shmem->smr, then you will need to start_write on the
     678             :    blockstore, query_try for the block_info_t object, set
     679             :    shmem->smr = meta->parent_slot, and then query_test, AND call
     680             :    blockstore_end_write. In the case that there's block_info contention,
     681             :    i.e. another thread is removing the block_info_t object of interest
     682             :    as we are trying to access it, the query_test will ERR_AGAIN, we will
     683             :    loop back and try again, hit the FD_MAP_ERR_KEY condition
     684             :    (and exit the loop gracefully), and we will have an incorrectly set
     685             :    shmem->smr.
     686             : 
     687             :    So depending on the complexity of what's being executed, it's easiest
     688             :    to directly copy what you need from the block_info_t into a variable
     689             :    outside the context of the loop, and use it further below, ex:
     690             : 
     691             :    ulong map_item = NULL_ITEM;
     692             :    loop {
     693             :      query_try
     694             :      map_item = ele->map_item; // like parent_slot
     695             :      query_test
     696             :    }
     697             :    check if map_item is NULL_ITEM
     698             :    fd_blockstore_start_write
     699             :    use map_item
     700             :    fd_blockstore_end_write
     701             : 
     702             :    Writes and updates (blocking). The pattern is:
     703             :    int err = fd_block_map_prepare( &slot, query, blocking );
     704             :    block_info_t * ele = fd_block_map_query_ele(query);
     705             : 
     706             :    IF slot was an existing key, then ele->slot == slot, and you are MODIFYING
     707             :       <modify ele>
     708             :    If slot was not an existing key, then ele->slot == 0, and you are INSERTING
     709             :       ele->slot = slot;
     710             :       <initialize ele>
     711             : 
     712             :    fd_block_map_publish(query); // will always succeed */
     713             : 
     714             : /* fd_blockstore_parent_slot_query queries the parent slot of slot.
     715             : 
     716             :    This is non-blocking. */
     717             : ulong
     718             : fd_blockstore_parent_slot_query( fd_blockstore_t * blockstore, ulong slot );
     719             : 
     720             : /* fd_blockstore_block_map_query_volatile is the same as above except it
     721             :    only copies out the metadata (fd_block_map_t).  Returns
     722             :    FD_BLOCKSTORE_SLOT_MISSING if slot is missing, otherwise
     723             :    FD_BLOCKSTORE_SUCCESS. */
     724             : 
     725             : int
     726             : fd_blockstore_block_map_query_volatile( fd_blockstore_t * blockstore,
     727             :                                         int               fd,
     728             :                                         ulong             slot,
     729             :                                         fd_block_info_t * block_info_out ) ;
     730             : 
     731             : /* fd_blockstore_block_info_test tests if a block meta entry exists for
     732             :    the given slot.  Returns 1 if the entry exists and 0 otherwise.
     733             : 
     734             :    IMPORTANT!  Caller MUST NOT be in a block_map_t prepare when calling
     735             :    this function. */
     736             : int
     737             : fd_blockstore_block_info_test( fd_blockstore_t * blockstore, ulong slot );
     738             : 
     739             : /* fd_blockstore_block_info_remove removes a block meta entry for
     740             :    the given slot.  Returns SUCCESS if the entry exists and an
     741             :    error code otherwise.
     742             : 
     743             :    IMPORTANT!  Caller MUST NOT be in a block_map_t prepare when calling
     744             :    this function. */
     745             : int
     746             : fd_blockstore_block_info_remove( fd_blockstore_t * blockstore, ulong slot );
     747             : 
     748             : /* fd_blockstore_slot_remove removes slot from blockstore, including all
     749             :    relevant internal structures.
     750             : 
     751             :    IMPORTANT! Caller MUST NOT be in a block_map_t prepare when calling
     752             :    this function. */
     753             : void
     754             : fd_blockstore_slot_remove( fd_blockstore_t * blockstore, ulong slot );
     755             : 
     756             : /* Operations */
     757             : 
     758             : /* fd_blockstore_shred_insert inserts shred into the blockstore, fast
     759             :    O(1).  Returns the current `consumed_idx` for the shred's slot if
     760             :    insert is successful, otherwise returns FD_SHRED_IDX_NULL on error.
     761             :    Reasons for error include this shred is already in the blockstore or
     762             :    the blockstore is full.
     763             : 
     764             :    fd_blockstore_shred_insert will manage locking, so the caller
     765             :    should NOT be acquiring the blockstore read/write lock before
     766             :    calling this function. */
     767             : 
     768             : void
     769             : fd_blockstore_shred_insert( fd_blockstore_t * blockstore, fd_shred_t const * shred );
     770             : 
     771             : /* fd_blockstore_buffered_shreds_remove removes all the unassembled shreds
     772             :    for a slot */
     773             : void
     774             : fd_blockstore_shred_remove( fd_blockstore_t * blockstore, ulong slot, uint idx );
     775             : 
     776             : /* fd_blockstore_slice_query queries for the block slice beginning from
     777             :    shred `start_idx`, ending at `end_idx`, inclusive. Validates start
     778             :    and end_idx as valid batch boundaries. Copies at most `max` bytes of
     779             :    the shred payloads, and returns FD_BLOCKSTORE_NO_MEM if the buffer is
     780             :    too small.
     781             : 
     782             :    Returns FD_BLOCKSTORE_SUCCESS (0) on success and a FD_MAP_ERR
     783             :    (negative) on failure.  On success, `buf` will be populated with the
     784             :    copied slice and `buf_sz` will contain the number of bytes copied.
     785             :    Caller must ignore the values of `buf` and `buf_sz` on failure.
     786             : 
     787             :    Implementation is lockfree and safe with concurrent operations on
     788             :    blockstore. */
     789             : 
     790             : int
     791             : fd_blockstore_slice_query( fd_blockstore_t * blockstore,
     792             :                            ulong             slot,
     793             :                            uint              start_idx,
     794             :                            uint              end_idx,
     795             :                            ulong             max,
     796             :                            uchar *           buf,
     797             :                            ulong *           buf_sz );
     798             : 
     799             : /* fd_blockstore_shreds_complete should be a replacement for anywhere that is
     800             :    querying for an fd_block_t * for existence but not actually using the block data.
     801             :    Semantically equivalent to query_block( slot ) != NULL.
     802             : 
     803             :    Implementation is lockfree and safe with concurrent operations on
     804             :    blockstore. */
     805             : int
     806             : fd_blockstore_shreds_complete( fd_blockstore_t * blockstore, ulong slot );
     807             : 
     808             : /* fd_blockstore_block_height_update sets the block height.
     809             : 
     810             :    IMPORTANT!  Caller MUST NOT be in a block_map_t prepare when calling
     811             :    this function. */
     812             : void
     813             : fd_blockstore_block_height_update( fd_blockstore_t * blockstore, ulong slot, ulong block_height );
     814             : 
     815             : ulong
     816             : fd_blockstore_block_height_query( fd_blockstore_t * blockstore, ulong slot );
     817             : 
     818             : /* fd_blockstore_publish publishes all blocks until the current
     819             :    blockstore smr (`blockstore->smr`).  Publishing entails 1. pruning
     820             :    and 2. archiving.  Pruning removes any blocks that are not part of
     821             :    the same fork as the smr (hence the name pruning, like pruning the
     822             :    branches of a tree).  Archiving removes from memory any slots < smr
     823             :    that are on the same fork, but writes those blocks out to disk using
     824             :    the provided file descriptor to the archival file `fd`.
     825             : 
     826             :    Note that slots < smr are ancestors of the smr, and are therefore
     827             :    finalized slots which is why they are archived.  Blocks removed as a
     828             :    result of pruning are not finalized, and therefore not archived.
     829             : 
     830             :    IMPORTANT!  Caller MUST hold the write lock when calling this
     831             :    function. */
     832             : 
     833             : void
     834             : fd_blockstore_publish( fd_blockstore_t * blockstore, int fd, ulong wmk );
     835             : 
     836             : void
     837             : fd_blockstore_log_block_status( fd_blockstore_t * blockstore, ulong around_slot );
     838             : 
     839             : /* fd_blockstore_log_mem_usage logs the memory usage of blockstore in a
     840             :    human-readable format.  Caller MUST hold the read lock. */
     841             : 
     842             : void
     843             : fd_blockstore_log_mem_usage( fd_blockstore_t * blockstore );
     844             : 
     845             : FD_PROTOTYPES_END
     846             : 
     847             : #ifndef BLOCK_ARCHIVING
     848             : #define BLOCK_ARCHIVING 0
     849             : #endif
     850             : 
     851             : #endif /* HEADER_fd_src_flamenco_runtime_fd_blockstore_h */

Generated by: LCOV version 1.14