LCOV - code coverage report
Current view: top level - flamenco/runtime - fd_blockstore.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 100 0.0 %
Date: 2025-08-01 05:13:22 Functions: 0 999 0.0 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_flamenco_runtime_fd_blockstore_h
       2             : #define HEADER_fd_src_flamenco_runtime_fd_blockstore_h
       3             : 
       4             : /* Blockstore is a high-performance database for in-memory indexing and
       5             :    durably storing blocks.
       6             : 
       7             :    `fd_blockstore` defines a number of useful types e.g. `fd_block_t`,
       8             :    `fd_block_shred`, etc.
       9             : 
      10             :    The blockstore alloc is used for allocating wksp resources for shred
      11             :    headers, microblock headers, and blocks.  This is an fd_alloc.
      12             :    Allocations from this allocator will be tagged with wksp_tag and
      13             :    operations on this allocator will use concurrency group 0. */
      14             : 
      15             : #include "../../ballet/block/fd_microblock.h"
      16             : #include "../../ballet/shred/fd_deshredder.h"
      17             : #include "../../ballet/shred/fd_shred.h"
      18             : #include "../fd_flamenco_base.h"
      19             : #include "../types/fd_types.h"
      20             : #include "fd_rwseq_lock.h"
      21             : #include "stdbool.h"
      22             : #include <fcntl.h>
      23             : 
      24             : /* FD_BLOCKSTORE_ALIGN specifies the alignment needed for blockstore.
      25             :    ALIGN is double x86 cache line to mitigate various kinds of false
      26             :    sharing (eg. ACLPF adjacent cache line prefetch). */
      27             : 
      28           0 : #define FD_BLOCKSTORE_ALIGN (128UL)
      29             : 
      30             : /* FD_BLOCKSTORE_MAGIC defines a magic number for verifying the memory
      31             :    of blockstore is not corrupted. */
      32             : 
      33           0 : #define FD_BLOCKSTORE_MAGIC (0xf17eda2ce7b10c00UL) /* firedancer bloc version 0 */
      34             : 
      35             : /* DO NOT MODIFY. */
      36             : // #define FD_BUF_SHRED_MAP_MAX (1UL << 24UL) /* 16 million shreds can be buffered */
      37             : 
      38             : /* TODO this can be removed if we explicitly manage a memory pool for
      39             :    the fd_block_map_t entries */
      40           0 : #define FD_BLOCKSTORE_CHILD_SLOT_MAX    (32UL)        /* the maximum # of children a slot can have */
      41           0 : #define FD_BLOCKSTORE_ARCHIVE_MIN_SIZE  (1UL << 26UL) /* 64MB := ceil(MAX_DATA_SHREDS_PER_SLOT*1228) */
      42             : 
      43             : /* FD_SLICE_ALIGN specifies the alignment needed for a block slice.
      44             :    ALIGN is double x86 cache line to mitigate various kinds of false
      45             :    sharing (eg. ACLPF adjacent cache line prefetch). */
      46             : 
      47             : #define FD_SLICE_ALIGN (128UL)
      48             : 
      49             : /* FD_SLICE_MAX specifies the maximum size of an entry batch. This is
      50             :    equivalent to the maximum size of a block (ie. a block with a single
      51             :    entry batch). */
      52             : 
      53           0 : #define FD_SLICE_MAX (FD_SHRED_DATA_PAYLOAD_MAX_PER_SLOT)
      54             : 
      55             : /* FD_SLICE_MAX_WITH_HEADERS specifies the maximum size of all of the
      56             :    shreds that can be in an entry batch. This is equivalent to max
      57             :    number of shreds (including header and payload) that can be in a
      58             :    single slot. */
      59             : 
      60           0 : #define FD_SLICE_MAX_WITH_HEADERS (FD_SHRED_DATA_HEADER_MAX_PER_SLOT + FD_SHRED_DATA_PAYLOAD_MAX_PER_SLOT)
      61             : 
      62             : /* 64 ticks per slot, and then one min size transaction per microblock
      63             :    for all the remaining microblocks.
      64             :    This bound should be used along with the transaction parser and tick
      65             :    verifier to enforce the assumptions.
      66             :    This is NOT a standalone conservative bound against malicious
      67             :    validators.
      68             :    A tighter bound could probably be derived if necessary. */
      69             : 
      70           0 : #define FD_MICROBLOCK_MAX_PER_SLOT ((FD_SHRED_DATA_PAYLOAD_MAX_PER_SLOT - 64UL*sizeof(fd_microblock_hdr_t)) / (sizeof(fd_microblock_hdr_t)+FD_TXN_MIN_SERIALIZED_SZ) + 64UL) /* 200,796 */
      71             : /* 64 ticks per slot, and a single gigantic microblock containing min
      72             :    size transactions. */
      73             : #define FD_TXN_MAX_PER_SLOT ((FD_SHRED_DATA_PAYLOAD_MAX_PER_SLOT - 65UL*sizeof(fd_microblock_hdr_t)) / (FD_TXN_MIN_SERIALIZED_SZ)) /* 272,635 */
      74             : 
      75             : // TODO centralize these
      76             : // https://github.com/firedancer-io/solana/blob/v1.17.5/sdk/program/src/clock.rs#L34
      77             : #define FD_MS_PER_TICK 6
      78             : 
      79             : // https://github.com/firedancer-io/solana/blob/v1.17.5/core/src/repair/repair_service.rs#L55
      80             : #define FD_REPAIR_TIMEOUT (200 / FD_MS_PER_TICK)
      81             : 
      82           0 : #define FD_BLOCKSTORE_SUCCESS                  0
      83             : #define FD_BLOCKSTORE_SUCCESS_SLOT_COMPLETE    1
      84           0 : #define FD_BLOCKSTORE_ERR_INVAL   (-1)
      85             : #define FD_BLOCKSTORE_ERR_AGAIN   (-2)
      86           0 : #define FD_BLOCKSTORE_ERR_CORRUPT (-3)
      87             : #define FD_BLOCKSTORE_ERR_EMPTY   (-4)
      88             : #define FD_BLOCKSTORE_ERR_FULL    (-5)
      89           0 : #define FD_BLOCKSTORE_ERR_KEY     (-6)
      90             : #define FD_BLOCKSTORE_ERR_SHRED_FULL      -1 /* no space left for shreds */
      91             : #define FD_BLOCKSTORE_ERR_SLOT_FULL       -2 /* no space left for slots */
      92             : #define FD_BLOCKSTORE_ERR_SHRED_MISSING   -4
      93           0 : #define FD_BLOCKSTORE_ERR_SLOT_MISSING    -5
      94           0 : #define FD_BLOCKSTORE_ERR_SHRED_INVALID   -7 /* shred was invalid */
      95             : #define FD_BLOCKSTORE_ERR_DESHRED_INVALID -8 /* deshredded block was invalid */
      96           0 : #define FD_BLOCKSTORE_ERR_NO_MEM          -9 /* no mem */
      97             : #define FD_BLOCKSTORE_ERR_UNKNOWN         -99
      98             : 
      99           0 : static inline char const * fd_blockstore_strerror( int err ) {
     100           0 :   switch( err ) {
     101           0 :   case FD_BLOCKSTORE_SUCCESS:     return "success";
     102           0 :   case FD_BLOCKSTORE_ERR_INVAL:   return "bad input";
     103           0 :   case FD_BLOCKSTORE_ERR_AGAIN:   return "try again";
     104           0 :   case FD_BLOCKSTORE_ERR_CORRUPT: return "corruption detected";
     105           0 :   case FD_BLOCKSTORE_ERR_EMPTY:   return "empty";
     106           0 :   case FD_BLOCKSTORE_ERR_FULL:    return "full";
     107           0 :   case FD_BLOCKSTORE_ERR_KEY:     return "key not found";
     108           0 :   default: break;
     109           0 :   }
     110           0 :   return "unknown";
     111           0 : }
     112             : 
     113             : struct fd_shred_key {
     114             :   ulong slot;
     115             :   uint  idx;
     116             : };
     117             : typedef struct fd_shred_key fd_shred_key_t;
     118             : 
     119             : static const fd_shred_key_t     fd_shred_key_null = { 0 };
     120             : #define FD_SHRED_KEY_NULL       fd_shred_key_null
     121             : #define FD_SHRED_KEY_INVAL(key) (!((key).slot) & !((key).idx))
     122           0 : #define FD_SHRED_KEY_EQ(k0,k1)  (!(((k0).slot) ^ ((k1).slot))) & !(((k0).idx) ^ (((k1).idx)))
     123           0 : #define FD_SHRED_KEY_HASH(key)  ((uint)(((key).slot)<<15UL) | (((key).idx))) /* current max shred idx is 32KB = 2 << 15*/
     124             : 
     125             : /* fd_buf_shred is a thin wrapper around fd_shred_t that facilitates
     126             :    buffering data shreds before all the shreds for a slot have been
     127             :    received. After all shreds are received, these buffered shreds are
     128             :    released back into memory pool and future queries for the shreds are
     129             :    offset into the block data directly.
     130             : 
     131             :    The blockstore is only aware of data shreds and all APIs involving
     132             :    shreds refers to data shreds.
     133             : 
     134             :    Shreds are buffered into a map as they are received:
     135             : 
     136             :    | 0 | 1 | 2 | x | x | 5 | x |
     137             :              ^           ^
     138             :              c           r
     139             : 
     140             :    c = "consumed" = contiguous window starting from index 0
     141             :    r = "received" = highest index received so far
     142             : 
     143             :    Shred memory layout while stored in the map:
     144             : 
     145             :    | shred hdr | shred payload |
     146             : */
     147             : struct __attribute__((aligned(128UL))) fd_buf_shred {
     148             :   fd_shred_key_t key;
     149             :   ulong          prev;
     150             :   ulong          next;
     151             :   ulong          memo;
     152             :   int            eqvoc; /* we've seen an equivocating version of this
     153             :                              shred (same key but different payload). */
     154             :   union {
     155             :     fd_shred_t hdr;                  /* shred header */
     156             :     uchar      buf[FD_SHRED_MIN_SZ]; /* the entire shred buffer, both header and payload. */
     157             :   };
     158             : };
     159             : typedef struct fd_buf_shred fd_buf_shred_t;
     160             : 
     161             : #define POOL_NAME  fd_buf_shred_pool
     162           0 : #define POOL_ELE_T fd_buf_shred_t
     163             : #include "../../util/tmpl/fd_pool_para.c"
     164             : 
     165             : #define MAP_NAME               fd_buf_shred_map
     166           0 : #define MAP_ELE_T              fd_buf_shred_t
     167           0 : #define MAP_KEY_T              fd_shred_key_t
     168           0 : #define MAP_KEY_EQ(k0,k1)      (FD_SHRED_KEY_EQ(*k0,*k1))
     169             : #define MAP_KEY_EQ_IS_SLOW     1
     170           0 : #define MAP_KEY_HASH(key,seed) (FD_SHRED_KEY_HASH(*key)^seed)
     171             : #include "../../util/tmpl/fd_map_chain_para.c"
     172             : 
     173             : #define DEQUE_NAME fd_slot_deque
     174           0 : #define DEQUE_T    ulong
     175             : #include "../../util/tmpl/fd_deque_dynamic.c"
     176             : 
     177             : /* fd_block_shred_t is a shred that has been assembled into a block. The
     178             :    shred begins at `off` relative to the start of the block's data
     179             :    region. */
     180             : struct fd_block_shred {
     181             :   fd_shred_t hdr; /* ptr to the data shred header */
     182             :   ulong      off; /* offset to the payload relative to the start of the block's data region */
     183             : };
     184             : typedef struct fd_block_shred fd_block_shred_t;
     185             : 
     186             : /*
     187             :  * fd_block_entry_batch_t is a microblock/entry batch within a block.
     188             :  * The offset is relative to the start of the block's data region,
     189             :  * and indicates where the batch ends.  The (exclusive) end offset of
     190             :  * batch i is the (inclusive) start offset of batch i+1.  The 0th batch
     191             :  * always starts at offset 0.
     192             :  * On the wire, the presence of one of the COMPLETE flags in a data
     193             :  * shred marks the end of a batch.
     194             :  * In other words, batch ends are aligned with shred ends, and batch
     195             :  * starts are aligned with shred starts.  Usually a batch comprises
     196             :  * multiple shreds, and a block comprises multiple batches.
     197             :  * This information is useful because bincode deserialization needs to
     198             :  * be performed on a per-batch basis.  Precisely a single array of
     199             :  * microblocks/entries is expected to be deserialized from a batch.
     200             :  * Trailing bytes in each batch are ignored by default.
     201             :  */
     202             : struct fd_block_entry_batch {
     203             :   ulong end_off; /* exclusive */
     204             : };
     205             : typedef struct fd_block_entry_batch fd_block_entry_batch_t;
     206             : 
     207             : /* fd_block_micro_t is a microblock ("entry" in Solana parlance) within
     208             :    a block. The microblock begins at `off` relative to the start of the
     209             :    block's data region. */
     210             : struct fd_block_micro {
     211             :   ulong off; /* offset into block data */
     212             : };
     213             : typedef struct fd_block_micro fd_block_micro_t;
     214             : 
     215             : /* If the 0th bit is set, this indicates the block is preparing, which
     216             :    means it might be partially executed e.g. a subset of the microblocks
     217             :    have been executed.  It is not safe to remove, relocate, or modify
     218             :    the block in any way at this time.
     219             : 
     220             :    Callers holding a pointer to a block should always make sure to
     221             :    inspect this flag.
     222             : 
     223             :    Other flags mainly provide useful metadata for read-only callers, eg.
     224             :    RPC. */
     225             : 
     226           0 : #define FD_BLOCK_FLAG_RECEIVING 0 /* xxxxxxx1 still receiving shreds */
     227           0 : #define FD_BLOCK_FLAG_COMPLETED 1 /* xxxxxx1x received the block ie. all shreds (SLOT_COMPLETE) */
     228             : #define FD_BLOCK_FLAG_REPLAYING 2 /* xxxxx1xx replay in progress (DO NOT REMOVE) */
     229           0 : #define FD_BLOCK_FLAG_PROCESSED 3 /* xxxx1xxx successfully replayed the block */
     230           0 : #define FD_BLOCK_FLAG_EQVOCSAFE 4 /* xxxx1xxx 52% of cluster has voted on this (slot, bank hash) */
     231           0 : #define FD_BLOCK_FLAG_CONFIRMED 5 /* xxx1xxxx 2/3 of cluster has voted on this (slot, bank hash) */
     232           0 : #define FD_BLOCK_FLAG_FINALIZED 6 /* xx1xxxxx 2/3 of cluster has rooted this slot */
     233             : #define FD_BLOCK_FLAG_DEADBLOCK 7 /* x1xxxxxx failed to replay the block */
     234             : 
     235             : /* Rewards assigned after block is executed */
     236             : 
     237             : struct fd_block_rewards {
     238             :   ulong collected_fees;
     239             :   fd_hash_t leader;
     240             :   ulong post_balance;
     241             : };
     242             : typedef struct fd_block_rewards fd_block_rewards_t;
     243             : 
     244             : /* Remaining bits [4, 8) are reserved.
     245             : 
     246             :    To avoid confusion, please use `fd_bits.h` API
     247             :    ie. `fd_uchar_set_bit`, `fd_uchar_extract_bit`. */
     248             : 
     249             : #define SET_NAME fd_block_set
     250             : #define SET_MAX  FD_SHRED_BLK_MAX
     251             : #include "../../util/tmpl/fd_set.c"
     252             : 
     253             : struct fd_block_info {
     254             :   ulong slot; /* map key */
     255             :   ulong next; /* reserved for use by fd_map_giant.c */
     256             : 
     257             :   /* Ancestry */
     258             : 
     259             :   ulong parent_slot;
     260             :   ulong child_slots[FD_BLOCKSTORE_CHILD_SLOT_MAX];
     261             :   ulong child_slot_cnt;
     262             : 
     263             :   /* Metadata */
     264             : 
     265             :   /* To be banished after offline ledger replay is removed. These fields
     266             :      are not used for replay. */
     267             :   ulong     block_height;
     268             :   fd_hash_t block_hash;
     269             :   fd_hash_t bank_hash;
     270             : 
     271             :   ulong     fec_cnt;        /* the number of FEC sets in the slot */
     272             :   uchar     flags;
     273             :   long      ts;             /* the wallclock time when we finished receiving the block. */
     274             : 
     275             :   /* Windowing
     276             : 
     277             :      Shreds are buffered into a map as they are received:
     278             : 
     279             :      | 0 | 1 | 2 | x | x | 5 | x |
     280             :            ^   ^           ^
     281             :            c   b           r
     282             : 
     283             :      c = "consumed" = contiguous shred idxs that have been consumed.
     284             :                       the "consumer" is replay and the idx is
     285             :                       incremented after replaying each block slice.
     286             :      b = "buffered" = contiguous shred idxs that have been buffered.
     287             :                       when buffered == block_slice_end the next slice of
     288             :                       a block is ready for replay.
     289             :      r = "received" = highest shred idx received so far. used to detect
     290             :                       when repair is needed.
     291             :   */
     292             : 
     293             :   uint consumed_idx; /* the highest shred idx we've contiguously consumed (consecutive from 0). */
     294             :   uint buffered_idx; /* the highest shred idx we've contiguously buffered (consecutive from 0). */
     295             :   uint received_idx; /* the highest shred idx we've received (can be out-of-order). */
     296             : 
     297             :   uint data_complete_idx; /* the highest shred idx wrt contiguous entry batches (inclusive). */
     298             :   uint slot_complete_idx; /* the highest shred idx for the entire slot (inclusive). */
     299             : 
     300             :   /* This is a bit vec (fd_set) that tracks every shred idx marked with
     301             :      FD_SHRED_DATA_FLAG_DATA_COMPLETE. The bit position in the fd_set
     302             :      corresponds to the shred's index. Note shreds can be received
     303             :      out-of-order so higher bits might be set before lower bits. */
     304             : 
     305             :   fd_block_set_t data_complete_idxs[FD_SHRED_BLK_MAX / sizeof(ulong)];
     306             : 
     307             :   /* Helpers for batching tick verification */
     308             : 
     309             :   ulong ticks_consumed;
     310             :   ulong tick_hash_count_accum;
     311             :   fd_hash_t in_poh_hash; /* TODO: might not be best place to hold this */
     312             : 
     313             :   /* Block */
     314             : 
     315             :   ulong block_gaddr; /* global address to the start of the allocated fd_block_t */
     316             : };
     317             : typedef struct fd_block_info fd_block_info_t;
     318             : 
     319             : #define MAP_NAME                  fd_block_map
     320           0 : #define MAP_ELE_T                 fd_block_info_t
     321           0 : #define MAP_KEY                   slot
     322           0 : #define MAP_ELE_IS_FREE(ctx, ele) ((ele)->slot == ULONG_MAX)
     323           0 : #define MAP_ELE_FREE(ctx, ele)    ((ele)->slot =  ULONG_MAX)
     324           0 : #define MAP_ELE_MOVE(ctx,dst,src) do { MAP_ELE_T * _src = (src); (*(dst)) = *_src; _src->MAP_KEY = (MAP_KEY_T)ULONG_MAX; } while(0)
     325           0 : #define MAP_KEY_HASH(key, seed)   (void)(seed), (*(key))
     326             : #include "../../util/tmpl/fd_map_slot_para.c"
     327             : 
     328           0 : #define BLOCK_INFO_LOCK_CNT  1024UL
     329             : #define BLOCK_INFO_PROBE_CNT 2UL
     330             : /*
     331             :    Rationale for block_map parameters:
     332             :     - each lock manages block_max / lock_cnt elements, so with block_max
     333             :       at 4096, each lock would manage 4 contiguous elements.
     334             :     - Since keys are unique and increment by 1, we can index key to map
     335             :       bucket by taking key % ele_max directly. This way in theory we
     336             :       have perfect hashing and never need to probe.
     337             :        - This breaks when we store more than 4096 contiguous slots,
     338             :          i.e.: slot 0 collides with slot 4096, but this is at heart an
     339             :          OOM issue.
     340             :     - Causes possible contention - consider if we execute n, but are
     341             :       storing shreds for n+1 -- these are managed by the same lock.
     342             :       Perhaps opportunity for optimization.
     343             : */
     344             : 
     345             : /* fd_block_idx is an in-memory index of finalized blocks that have been
     346             :    archived to disk.  It records the slot together with the byte offset
     347             :    relative to the start of the file. */
     348             : 
     349             : struct fd_block_idx {
     350             :   ulong     slot;
     351             :   ulong     next;
     352             :   uint      hash;
     353             :   ulong     off;
     354             :   fd_hash_t block_hash;
     355             :   fd_hash_t bank_hash;
     356             : };
     357             : typedef struct fd_block_idx fd_block_idx_t;
     358             : 
     359             : #define MAP_NAME          fd_block_idx
     360           0 : #define MAP_T             fd_block_idx_t
     361           0 : #define MAP_KEY           slot
     362           0 : #define MAP_KEY_HASH(key) ((uint)(key)) /* finalized slots are guaranteed to be unique so perfect hashing */
     363           0 : #define MAP_KEY_INVAL(k)  (k == ULONG_MAX)
     364             : #include "../../util/tmpl/fd_map_dynamic.c"
     365             : 
     366             : /* fd_blockstore_archiver outlines the format of metadata
     367             :    at the start of an archive file - needed so that archive
     368             :    files can be read back on initialization. */
     369             : 
     370             : struct fd_blockstore_archiver {
     371             :   ulong fd_size_max;      /* maximum size of the archival file */
     372             :   ulong num_blocks;       /* number of blocks in the archival file. needed for reading back */
     373             :   ulong head;             /* location of least recently written block */
     374             :   ulong tail;             /* location after most recently written block */
     375             : };
     376             : typedef struct fd_blockstore_archiver fd_blockstore_archiver_t;
     377           0 : #define FD_BLOCKSTORE_ARCHIVE_START sizeof(fd_blockstore_archiver_t)
     378             : 
     379             : /*   CONCURRENCY NOTES FOR BLOCKSTORE ENJOINERS:
     380             : 
     381             :    With the parallelization of the shred map and block map, parts of the
     382             :    blockstore are concurrent, and parts are not. Block map and shred map
     383             :    have their own locks, which are managed through the
     384             :    query_try/query_test APIs. When accessing buf_shred_t and
     385             :    block_info_t items then, the caller does not need to use
     386             :    blockstore_start/end_read/write. However, the
     387             :    blockstore_start/end_read/write still protects the blockstore_shmem_t
     388             :    object. If you are reading and writing any blockstore_shmem fields
     389             :    and at the same time accessing the block_info_t or buf_shred_t, you
     390             :    should call both the blockstore_start/end_read/write APIs AND the map
     391             :    query_try/test APIs. These are locks of separate concerns and will
     392             :    not deadlock with each other. TODO update docs when we switch to
     393             :    fenced read/write for primitive fields in shmem_t. */
     394             : struct __attribute__((aligned(FD_BLOCKSTORE_ALIGN))) fd_blockstore_shmem {
     395             : 
     396             :   /* Metadata */
     397             : 
     398             :   ulong magic;
     399             :   ulong blockstore_gaddr;
     400             :   ulong wksp_tag;
     401             :   ulong seed;
     402             : 
     403             :   /* Persistence */
     404             : 
     405             :   fd_blockstore_archiver_t archiver;
     406             :   ulong mrw_slot; /* most recently written slot */
     407             : 
     408             :   /* Slot metadata */
     409             : 
     410             :   ulong lps; /* latest processed slot */
     411             :   ulong hcs; /* highest confirmed slot */
     412             :   ulong wmk; /* watermark. DO NOT MODIFY DIRECTLY. */
     413             : 
     414             :   /* Config limits */
     415             : 
     416             :   ulong shred_max; /* maximum # of shreds that can be held in memory */
     417             :   ulong block_max; /* maximum # of blocks that can be held in memory */
     418             :   ulong idx_max;   /* maximum # of blocks that can be indexed from the archival file */
     419             :   ulong alloc_max; /* maximum bytes that can be allocated */
     420             : 
     421             :   //ulong block_map_gaddr;  /* map of slot->(slot_meta, block) */
     422             :   ulong block_idx_gaddr;  /* map of slot->byte offset in archival file */
     423             :   ulong slot_deque_gaddr; /* deque of slot numbers */
     424             : 
     425             :   ulong alloc_gaddr;
     426             : };
     427             : typedef struct fd_blockstore_shmem fd_blockstore_shmem_t;
     428             : 
     429             : /* fd_blockstore_t is a local join to the blockstore.  This is specific
     430             :    to the local address space should not be shared across tiles. */
     431             : 
     432             : struct fd_blockstore {
     433             : 
     434             :   /* shared memory region */
     435             : 
     436             :   fd_blockstore_shmem_t * shmem; /* read/writes to shmem must call fd_blockstore_start_read()*/
     437             : 
     438             :   /* local join handles */
     439             : 
     440             :   fd_buf_shred_pool_t shred_pool[1];
     441             :   fd_buf_shred_map_t  shred_map[1];
     442             :   fd_block_map_t      block_map[1];
     443             : };
     444             : typedef struct fd_blockstore fd_blockstore_t;
     445             : 
     446             : FD_PROTOTYPES_BEGIN
     447             : 
     448             : /* Construction API */
     449             : 
     450             : FD_FN_CONST static inline ulong
     451           0 : fd_blockstore_align( void ) {
     452           0 :   return FD_BLOCKSTORE_ALIGN;
     453           0 : }
     454             : 
     455             : /* fd_blockstore_footprint returns the footprint of the entire
     456             :    blockstore shared memory region occupied by `fd_blockstore_shmem_t`
     457             :    including data structures. */
     458             : 
     459             : FD_FN_CONST static inline ulong
     460           0 : fd_blockstore_footprint( ulong shred_max, ulong block_max, ulong idx_max ) {
     461             :   /* TODO -- when removing, make change in fd_blockstore_new as well */
     462           0 :   block_max      = fd_ulong_pow2_up( block_max );
     463           0 :   ulong lock_cnt = fd_ulong_min( block_max, BLOCK_INFO_LOCK_CNT );
     464             : 
     465           0 :   int lg_idx_max = fd_ulong_find_msb( fd_ulong_pow2_up( idx_max ) );
     466           0 :   return FD_LAYOUT_FINI(
     467           0 :     FD_LAYOUT_APPEND(
     468           0 :     FD_LAYOUT_APPEND(
     469           0 :     FD_LAYOUT_APPEND(
     470           0 :     FD_LAYOUT_APPEND(
     471           0 :     FD_LAYOUT_APPEND(
     472           0 :     FD_LAYOUT_APPEND(
     473           0 :     FD_LAYOUT_APPEND(
     474           0 :     FD_LAYOUT_APPEND(
     475           0 :     FD_LAYOUT_APPEND(
     476           0 :     FD_LAYOUT_INIT,
     477           0 :       alignof(fd_blockstore_shmem_t), sizeof(fd_blockstore_shmem_t) ),
     478           0 :       alignof(fd_buf_shred_t),        sizeof(fd_buf_shred_t) * shred_max ),
     479           0 :       fd_buf_shred_pool_align(),      fd_buf_shred_pool_footprint() ),
     480           0 :       fd_buf_shred_map_align(),       fd_buf_shred_map_footprint( shred_max ) ),
     481           0 :       alignof(fd_block_info_t),        sizeof(fd_block_info_t) * block_max ),
     482           0 :       fd_block_map_align(),           fd_block_map_footprint( block_max, lock_cnt, BLOCK_INFO_PROBE_CNT ) ),
     483           0 :       fd_block_idx_align(),           fd_block_idx_footprint( lg_idx_max ) ),
     484           0 :       fd_slot_deque_align(),          fd_slot_deque_footprint( block_max ) ),
     485           0 :       fd_alloc_align(),               fd_alloc_footprint() ),
     486           0 :     fd_blockstore_align() );
     487           0 : }
     488             : 
     489             : /* fd_blockstore_new formats a memory region with the appropriate
     490             :    alignment and footprint into a blockstore.  shmem points in the
     491             :    caller's address space of the memory region to format.  Returns shmem
     492             :    on success (blockstore has ownership of the memory region) and NULL
     493             :    on failure (no changes, logs details).  Caller is not joined on
     494             :    return.  The blockstore will be empty and unlocked. */
     495             : 
     496             : void *
     497             : fd_blockstore_new( void * shmem,
     498             :                    ulong  wksp_tag,
     499             :                    ulong  seed,
     500             :                    ulong  shred_max,
     501             :                    ulong  block_max,
     502             :                    ulong  idx_max );
     503             : 
     504             : /* fd_blockstore_join joins a blockstore.  ljoin points to a
     505             :    fd_blockstore_t compatible memory region in the caller's address
     506             :    space used to hold info about the local join, shblockstore points in
     507             :    the caller's address space to the memory region containing the
     508             :    blockstore.  Returns a handle to the caller's local join on success
     509             :    (join has ownership of the ljoin region) and NULL on failure (no
     510             :    changes, logs details). */
     511             : 
     512             : fd_blockstore_t *
     513             : fd_blockstore_join( void * ljoin, void * shblockstore );
     514             : 
     515             : void *
     516             : fd_blockstore_leave( fd_blockstore_t * blockstore );
     517             : 
     518             : void *
     519             : fd_blockstore_delete( void * shblockstore );
     520             : 
     521             : /* fd_blockstore_init initializes a blockstore with the given
     522             :    `slot_bank`.  This bank is used for initializing fields (SMR, etc.),
     523             :    and should be the bank upon finishing a snapshot load if booting from
     524             :    a snapshot, genesis bank otherwise.  It is also used to "fake" the
     525             :    snapshot block as if that block's data were available.  The metadata
     526             :    for this block's slot will be populated (fd_block_map_t) but the
     527             :    actual block data (fd_block_t) won't exist. This is done to bootstrap
     528             :    the various components for live replay (turbine, repair, etc.)
     529             : 
     530             :    `fd` is a file descriptor for the blockstore archival file.  As part
     531             :    of `init`, blockstore rebuilds an in-memory index of the archival
     532             :    file.  */
     533             : 
     534             : fd_blockstore_t *
     535             : fd_blockstore_init( fd_blockstore_t *      blockstore,
     536             :                     int                    fd,
     537             :                     ulong                  fd_size_max,
     538             :                     ulong                  slot );
     539             : 
     540             : /* fd_blockstore_fini finalizes a blockstore.
     541             : 
     542             :    IMPORTANT!  Caller MUST hold the read lock when calling this
     543             :    function. */
     544             : 
     545             : void
     546             : fd_blockstore_fini( fd_blockstore_t * blockstore );
     547             : 
     548             : /* Accessors */
     549             : 
     550             : /* fd_blockstore_wksp returns the local join to the wksp backing the
     551             :    blockstore. The lifetime of the returned pointer is at least as long
     552             :    as the lifetime of the local join.  Assumes blockstore is a current
     553             :    local join. */
     554             : 
     555             : FD_FN_PURE static inline fd_wksp_t *
     556           0 : fd_blockstore_wksp( fd_blockstore_t * blockstore ) {
     557           0 :   return (fd_wksp_t *)( ( (ulong)blockstore->shmem ) - blockstore->shmem->blockstore_gaddr );
     558           0 : }
     559             : 
     560             : /* fd_blockstore_wksp_tag returns the workspace allocation tag used by
     561             :    the blockstore for its wksp allocations.  Will be positive.  Assumes
     562             :    blockstore is a current local join. */
     563             : 
     564             : FD_FN_PURE static inline ulong
     565           0 : fd_blockstore_wksp_tag( fd_blockstore_t const * blockstore ) {
     566           0 :   return blockstore->shmem->wksp_tag;
     567           0 : }
     568             : 
     569             : /* fd_blockstore_seed returns the hash seed used by the blockstore for various hash
     570             :    functions.  Arbitrary value.  Assumes blockstore is a current local join.
     571             :    TODO: consider renaming hash_seed? */
     572             : FD_FN_PURE static inline ulong
     573           0 : fd_blockstore_seed( fd_blockstore_t const * blockstore ) {
     574           0 :   return blockstore->shmem->seed;
     575           0 : }
     576             : 
     577             : /* fd_block_idx returns a pointer in the caller's address space to the
     578             :    fd_block_idx_t in the blockstore wksp.  Assumes blockstore is local
     579             :    join.  Lifetime of the returned pointer is that of the local join. */
     580             : 
     581             : FD_FN_PURE static inline fd_block_idx_t *
     582           0 : fd_blockstore_block_idx( fd_blockstore_t * blockstore ) {
     583           0 :   return fd_wksp_laddr_fast( fd_blockstore_wksp( blockstore ), blockstore->shmem->block_idx_gaddr );
     584           0 : }
     585             : 
     586             : /* fd_slot_deque returns a pointer in the caller's address space to the
     587             :    fd_slot_deque_t in the blockstore wksp.  Assumes blockstore is local
     588             :    join.  Lifetime of the returned pointer is that of the local join. */
     589             : 
     590             : FD_FN_PURE static inline ulong *
     591           0 : fd_blockstore_slot_deque( fd_blockstore_t * blockstore ) {
     592           0 :   return fd_wksp_laddr_fast( fd_blockstore_wksp( blockstore), blockstore->shmem->slot_deque_gaddr );
     593           0 : }
     594             : 
     595             : /* fd_blockstore_alloc returns a pointer in the caller's address space to
     596             :    the blockstore's allocator. */
     597             : 
     598             : FD_FN_PURE static inline fd_alloc_t * /* Lifetime is that of the local join */
     599           0 : fd_blockstore_alloc( fd_blockstore_t * blockstore ) {
     600           0 :   return fd_wksp_laddr_fast( fd_blockstore_wksp( blockstore), blockstore->shmem->alloc_gaddr );
     601           0 : }
     602             : 
     603             : /* fd_blockstore_shred_test returns 1 if a shred keyed by (slot, idx) is
     604             :    already in the blockstore and 0 otherwise.  */
     605             : 
     606             : int
     607             : fd_blockstore_shred_test( fd_blockstore_t * blockstore, ulong slot, uint idx );
     608             : 
     609             : /* fd_buf_shred_query_copy_data queries the blockstore for shred at
     610             :    slot, shred_idx. Copies the shred data to the given buffer and
     611             :    returns the data size. Returns -1 on failure.
     612             : 
     613             :    IMPORTANT!  Caller MUST hold the read lock when calling this
     614             :    function. */
     615             : 
     616             : long
     617             : fd_buf_shred_query_copy_data( fd_blockstore_t * blockstore,
     618             :                               ulong             slot,
     619             :                               uint              shred_idx,
     620             :                               void *            buf,
     621             :                               ulong             buf_max );
     622             : 
     623             : /* fd_blockstore_block_hash_query performs a blocking query (concurrent
     624             :    writers are not blocked) for the block hash of slot.  Returns
     625             :    FD_BLOCKSTORE_SUCCESS on success and FD_BLOCKSTORE_ERR_KEY if slot is
     626             :    not in blockstore.  Cannot fail.  On success, a copy of the block
     627             :    hash will be populated in `block_hash`.  Retains no interest in
     628             :    `slot` or `block_hash`.
     629             : 
     630             :    The block hash is the final poh hash for a slot and available on the
     631             :    last microblock header. */
     632             : 
     633             : int
     634             : fd_blockstore_block_hash_query( fd_blockstore_t * blockstore, ulong slot, fd_hash_t * block_hash );
     635             : 
     636             : /* fd_blockstore_bank_hash_query performs a blocking query (concurrent
     637             :    writers are not blocked) for the bank hash of slot.  Returns
     638             :    FD_BLOCKSTORE_SUCCESS on success and FD_BLOCKSTORE_ERR_KEY if slot is
     639             :    not in blockstore.  Cannot fail.  On success, a copy of the bank hash
     640             :    will be populated in `bank_hash`.  Retains no interest in `slot` or
     641             :    `bank_hash`.
     642             : 
     643             :    The bank hash is a hash of the execution state (the "bank") after
     644             :    executing the block for a given slot. */
     645             : 
     646             : int
     647             : fd_blockstore_bank_hash_query( fd_blockstore_t * blockstore, ulong slot, fd_hash_t * bank_hash );
     648             : 
     649             : /* fd_blockstore_block_map_query queries the blockstore for the block
     650             :    map entry at slot.  Returns a pointer to the slot meta or NULL if not
     651             :    in blockstore.
     652             : 
     653             :    IMPORTANT! This should only be used for single-threaded / offline
     654             :    use-cases as it does not test the query. Read notes below for
     655             :    block_map usage in live. */
     656             : 
     657             : fd_block_info_t *
     658             : fd_blockstore_block_map_query( fd_blockstore_t * blockstore, ulong slot );
     659             : 
     660             : /* IMPORTANT! NOTES FOR block_map USAGE:
     661             : 
     662             :    The block_info entries must be queried using the query_try/query_test
     663             :    pattern. This will frequently look like:
     664             : 
     665             :    int err = FD_MAP_ERR_AGAIN;
     666             :    loop while( err == FD_MAP_ERR_AGAIN )
     667             :       block_map_query_t query;
     668             :       err = fd_block_map_query_try( nonblocking );
     669             :       block_info_t * ele = fd_block_map_query_ele(query);
     670             :       if ERROR is FD_MAP_ERR_KEY, then the slot is not found.
     671             :       if ERROR is FD_MAP_ERR_AGAIN, then immediately continue.
     672             :          // important to handle ALL possible return err codes *before*
     673             :          // accessing the ele, as the ele will be the sentinel (usually NULL)
     674             :       speculatively execute <stuff>
     675             :          - no side effects
     676             :          - no early return
     677             :       err = fd_block_map_query_test(query)
     678             :    end loop
     679             : 
     680             :    Some accessors are provided to callers that already do this pattern,
     681             :    and handle the looping querying. For example, block_hash_copy, and
     682             :    parent_slot_query. However, for most caller use cases, it would be
     683             :    much more effecient to use the query_try/query_test pattern directly.
     684             : 
     685             :    Example: if you are accessing a block_info_t m, and m->parent_slot to
     686             :    the blockstore->shmem->smr, then you will need to start_write on the
     687             :    blockstore, query_try for the block_info_t object, set
     688             :    shmem->smr = meta->parent_slot, and then query_test, AND call
     689             :    blockstore_end_write. In the case that there's block_info contention,
     690             :    i.e. another thread is removing the block_info_t object of interest
     691             :    as we are trying to access it, the query_test will ERR_AGAIN, we will
     692             :    loop back and try again, hit the FD_MAP_ERR_KEY condition
     693             :    (and exit the loop gracefully), and we will have an incorrectly set
     694             :    shmem->smr.
     695             : 
     696             :    So depending on the complexity of what's being executed, it's easiest
     697             :    to directly copy what you need from the block_info_t into a variable
     698             :    outside the context of the loop, and use it further below, ex:
     699             : 
     700             :    ulong map_item = NULL_ITEM;
     701             :    loop {
     702             :      query_try
     703             :      map_item = ele->map_item; // like parent_slot
     704             :      query_test
     705             :    }
     706             :    check if map_item is NULL_ITEM
     707             :    fd_blockstore_start_write
     708             :    use map_item
     709             :    fd_blockstore_end_write
     710             : 
     711             :    Writes and updates (blocking). The pattern is:
     712             :    int err = fd_block_map_prepare( &slot, query, blocking );
     713             :    block_info_t * ele = fd_block_map_query_ele(query);
     714             : 
     715             :    IF slot was an existing key, then ele->slot == slot, and you are MODIFYING
     716             :       <modify ele>
     717             :    If slot was not an existing key, then ele->slot == 0, and you are INSERTING
     718             :       ele->slot = slot;
     719             :       <initialize ele>
     720             : 
     721             :    fd_block_map_publish(query); // will always succeed */
     722             : 
     723             : /* fd_blockstore_parent_slot_query queries the parent slot of slot.
     724             : 
     725             :    This is non-blocking. */
     726             : ulong
     727             : fd_blockstore_parent_slot_query( fd_blockstore_t * blockstore, ulong slot );
     728             : 
     729             : /* fd_blockstore_block_map_query_volatile is the same as above except it
     730             :    only copies out the metadata (fd_block_map_t).  Returns
     731             :    FD_BLOCKSTORE_SLOT_MISSING if slot is missing, otherwise
     732             :    FD_BLOCKSTORE_SUCCESS. */
     733             : 
     734             : int
     735             : fd_blockstore_block_map_query_volatile( fd_blockstore_t * blockstore,
     736             :                                         int               fd,
     737             :                                         ulong             slot,
     738             :                                         fd_block_info_t * block_info_out ) ;
     739             : 
     740             : /* fd_blockstore_block_info_test tests if a block meta entry exists for
     741             :    the given slot.  Returns 1 if the entry exists and 0 otherwise.
     742             : 
     743             :    IMPORTANT!  Caller MUST NOT be in a block_map_t prepare when calling
     744             :    this function. */
     745             : int
     746             : fd_blockstore_block_info_test( fd_blockstore_t * blockstore, ulong slot );
     747             : 
     748             : /* fd_blockstore_block_info_remove removes a block meta entry for
     749             :    the given slot.  Returns SUCCESS if the entry exists and an
     750             :    error code otherwise.
     751             : 
     752             :    IMPORTANT!  Caller MUST NOT be in a block_map_t prepare when calling
     753             :    this function. */
     754             : int
     755             : fd_blockstore_block_info_remove( fd_blockstore_t * blockstore, ulong slot );
     756             : 
     757             : /* fd_blockstore_slot_remove removes slot from blockstore, including all
     758             :    relevant internal structures.
     759             : 
     760             :    IMPORTANT! Caller MUST NOT be in a block_map_t prepare when calling
     761             :    this function. */
     762             : void
     763             : fd_blockstore_slot_remove( fd_blockstore_t * blockstore, ulong slot );
     764             : 
     765             : /* Operations */
     766             : 
     767             : /* fd_blockstore_shred_insert inserts shred into the blockstore, fast
     768             :    O(1).  Returns the current `consumed_idx` for the shred's slot if
     769             :    insert is successful, otherwise returns FD_SHRED_IDX_NULL on error.
     770             :    Reasons for error include this shred is already in the blockstore or
     771             :    the blockstore is full.
     772             : 
     773             :    fd_blockstore_shred_insert will manage locking, so the caller
     774             :    should NOT be acquiring the blockstore read/write lock before
     775             :    calling this function. */
     776             : 
     777             : void
     778             : fd_blockstore_shred_insert( fd_blockstore_t * blockstore, fd_shred_t const * shred );
     779             : 
     780             : /* fd_blockstore_buffered_shreds_remove removes all the unassembled shreds
     781             :    for a slot */
     782             : void
     783             : fd_blockstore_shred_remove( fd_blockstore_t * blockstore, ulong slot, uint idx );
     784             : 
     785             : /* fd_blockstore_slice_query queries for the block slice beginning from
     786             :    shred `start_idx`, ending at `end_idx`, inclusive. Validates start
     787             :    and end_idx as valid batch boundaries. Copies at most `max` bytes of
     788             :    the shred payloads, and returns FD_BLOCKSTORE_NO_MEM if the buffer is
     789             :    too small.
     790             : 
     791             :    Returns FD_BLOCKSTORE_SUCCESS (0) on success and a FD_MAP_ERR
     792             :    (negative) on failure.  On success, `buf` will be populated with the
     793             :    copied slice and `buf_sz` will contain the number of bytes copied.
     794             :    Caller must ignore the values of `buf` and `buf_sz` on failure.
     795             : 
     796             :    Implementation is lockfree and safe with concurrent operations on
     797             :    blockstore. */
     798             : 
     799             : int
     800             : fd_blockstore_slice_query( fd_blockstore_t * blockstore,
     801             :                            ulong             slot,
     802             :                            uint              start_idx,
     803             :                            uint              end_idx,
     804             :                            ulong             max,
     805             :                            uchar *           buf,
     806             :                            ulong *           buf_sz );
     807             : 
     808             : /* fd_blockstore_shreds_complete should be a replacement for anywhere that is
     809             :    querying for an fd_block_t * for existence but not actually using the block data.
     810             :    Semantically equivalent to query_block( slot ) != NULL.
     811             : 
     812             :    Implementation is lockfree and safe with concurrent operations on
     813             :    blockstore. */
     814             : int
     815             : fd_blockstore_shreds_complete( fd_blockstore_t * blockstore, ulong slot );
     816             : 
     817             : /* fd_blockstore_publish publishes all blocks until the current
     818             :    blockstore smr (`blockstore->smr`).  Publishing entails 1. pruning
     819             :    and 2. archiving.  Pruning removes any blocks that are not part of
     820             :    the same fork as the smr (hence the name pruning, like pruning the
     821             :    branches of a tree).  Archiving removes from memory any slots < smr
     822             :    that are on the same fork, but writes those blocks out to disk using
     823             :    the provided file descriptor to the archival file `fd`.
     824             : 
     825             :    Note that slots < smr are ancestors of the smr, and are therefore
     826             :    finalized slots which is why they are archived.  Blocks removed as a
     827             :    result of pruning are not finalized, and therefore not archived.
     828             : 
     829             :    IMPORTANT!  Caller MUST hold the write lock when calling this
     830             :    function. */
     831             : 
     832             : void
     833             : fd_blockstore_publish( fd_blockstore_t * blockstore, int fd, ulong wmk );
     834             : 
     835             : void
     836             : fd_blockstore_log_block_status( fd_blockstore_t * blockstore, ulong around_slot );
     837             : 
     838             : /* fd_blockstore_log_mem_usage logs the memory usage of blockstore in a
     839             :    human-readable format.  Caller MUST hold the read lock. */
     840             : 
     841             : void
     842             : fd_blockstore_log_mem_usage( fd_blockstore_t * blockstore );
     843             : 
     844             : FD_PROTOTYPES_END
     845             : 
     846             : #ifndef BLOCK_ARCHIVING
     847             : #define BLOCK_ARCHIVING 0
     848             : #endif
     849             : 
     850             : #endif /* HEADER_fd_src_flamenco_runtime_fd_blockstore_h */

Generated by: LCOV version 1.14