LCOV - code coverage report
Current view: top level - disco/topo - fd_topo.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 22 107 20.6 %
Date: 2025-08-05 05:04:49 Functions: 4 1470 0.3 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_disco_topo_fd_topo_h
       2             : #define HEADER_fd_src_disco_topo_fd_topo_h
       3             : 
       4             : #include "../stem/fd_stem.h"
       5             : #include "../../tango/fd_tango.h"
       6             : #include "../../waltz/xdp/fd_xdp1.h"
       7             : #include "../../ballet/base58/fd_base58.h"
       8             : #include "../../util/net/fd_net_headers.h"
       9             : 
      10             : /* Maximum number of workspaces that may be present in a topology. */
      11             : #define FD_TOPO_MAX_WKSPS         (256UL)
      12             : /* Maximum number of links that may be present in a topology. */
      13             : #define FD_TOPO_MAX_LINKS         (256UL)
      14             : /* Maximum number of tiles that may be present in a topology. */
      15             : #define FD_TOPO_MAX_TILES         (256UL)
      16             : /* Maximum number of objects that may be present in a topology. */
      17             : #define FD_TOPO_MAX_OBJS          (4096UL)
      18             : /* Maximum number of links that may go into any one tile in the
      19             :    topology. */
      20             : #define FD_TOPO_MAX_TILE_IN_LINKS  ( 128UL)
      21             : /* Maximum number of links that a tile may write to. */
      22             : #define FD_TOPO_MAX_TILE_OUT_LINKS ( 32UL)
      23             : /* Maximum number of objects that a tile can use. */
      24             : #define FD_TOPO_MAX_TILE_OBJS      ( 256UL)
      25             : 
      26             : /* Maximum number of additional ip addresses */
      27             : #define FD_NET_MAX_SRC_ADDR 4
      28             : 
      29             : /* Maximum number of additional destinations for leader shreds and for retransmitted shreds */
      30             : #define FD_TOPO_ADTL_DESTS_MAX ( 32UL)
      31             : 
      32             : 
      33             : /* A workspace is a Firedancer specific memory management structure that
      34             :    sits on top of 1 or more memory mapped gigantic or huge pages mounted
      35             :    to the hugetlbfs. */
      36             : typedef struct {
      37             :   ulong id;           /* The ID of this workspace.  Indexed from [0, wksp_cnt).  When placed in a topology, the ID must be the index of the workspace in the workspaces list. */
      38             :   char  name[ 13UL ]; /* The name of this workspace, like "pack".  There can be at most one of each workspace name in a topology. */
      39             : 
      40             :   ulong numa_idx;     /* The index of the NUMA node on the system that this workspace should be allocated from. */
      41             : 
      42             :   int   is_locked;    /* If the workspace should use pages locked and pinned to a specific numa node. */
      43             : 
      44             :   /* Computed fields.  These are not supplied as configuration but calculated as needed. */
      45             :   struct {
      46             :     ulong page_sz;  /* The size of the pages that this workspace is backed by.  One of FD_PAGE_SIZE_*. */
      47             :     ulong page_cnt; /* The number of pages that must be mapped to this workspace to store all the data needed by consumers. */
      48             :     ulong part_max; /* The maximum number of partitions in the underlying workspace.  There can only be this many allocations made at any one time. */
      49             : 
      50             :     fd_wksp_t * wksp;            /* The workspace memory in the local process. */
      51             :     ulong       known_footprint; /* Total size in bytes of all data in Firedancer that will be stored in this workspace at startup. */
      52             :     ulong       total_footprint; /* Total size in bytes of all data in Firedancer that could be stored in this workspace, includes known data and loose data. */
      53             :   };
      54             : } fd_topo_wksp_t;
      55             : 
      56             : /* A link is an mcache in a workspace that has one producer and one or
      57             :    more consumers. A link may optionally also have a dcache, that holds
      58             :    fragments referred to by the mcache entries.
      59             : 
      60             :    A link belongs to exactly one workspace.  A link has exactly one
      61             :    producer, and 1 or more consumers.  Each consumer is either reliable
      62             :    or not reliable.  A link has a depth and a MTU, which correspond to
      63             :    the depth and MTU of the mcache and dcache respectively.  A MTU of
      64             :    zero means no dcache is needed, as there is no data. */
      65             : typedef struct {
      66             :   ulong id;           /* The ID of this link.  Indexed from [0, link_cnt).  When placed in a topology, the ID must be the index of the link in the links list. */
      67             :   char  name[ 13UL ]; /* The name of this link, like "pack_bank". There can be multiple of each link name in a topology. */
      68             :   ulong kind_id;      /* The ID of this link within its name.  If there are N links of a particular name, they have IDs [0, N).  The pair (name, kind_id) uniquely identifies a link, as does "id" on its own. */
      69             : 
      70             :   ulong depth;    /* The depth of the mcache representing the link. */
      71             :   ulong mtu;      /* The MTU of data fragments in the mcache.  A value of 0 means there is no dcache. */
      72             :   ulong burst;    /* The max amount of MTU sized data fragments that might be bursted to the dcache. */
      73             : 
      74             :   ulong mcache_obj_id;
      75             :   ulong dcache_obj_id;
      76             : 
      77             :   /* Computed fields.  These are not supplied as configuration but calculated as needed. */
      78             :   struct {
      79             :     fd_frag_meta_t * mcache; /* The mcache of this link. */
      80             :     void *           dcache; /* The dcache of this link, if it has one. */
      81             :   };
      82             : 
      83             :   uint permit_no_consumers : 1;  /* Permit a topology where this link has no consumers */
      84             :   uint permit_no_producers : 1;  /* Permit a topology where this link has no producers */
      85             : } fd_topo_link_t;
      86             : 
      87             : /* Be careful: ip and host are in different byte order */
      88             : typedef struct {
      89             :   uint   ip;   /* in network byte order */
      90             :   ushort port; /* in host byte order */
      91             : } fd_topo_ip_port_t;
      92             : 
      93             : struct fd_topo_net_tile {
      94             :   ulong umem_dcache_obj_id;  /* dcache for XDP UMEM frames */
      95             :   uint  bind_address;
      96             : 
      97             :   ushort shred_listen_port;
      98             :   ushort quic_transaction_listen_port;
      99             :   ushort legacy_transaction_listen_port;
     100             :   ushort gossip_listen_port;
     101             :   ushort repair_intake_listen_port;
     102             :   ushort repair_serve_listen_port;
     103             :   ushort send_src_port;
     104             : };
     105             : typedef struct fd_topo_net_tile fd_topo_net_tile_t;
     106             : 
     107             : /* A tile is a unique process that is spawned by Firedancer to represent
     108             :    one thread of execution.  Firedancer sandboxes all tiles to their own
     109             :    process for security reasons.
     110             : 
     111             :    A tile belongs to exactly one workspace.  A tile is a consumer of 0
     112             :    or more links, it's inputs.  A tile is a producer of 0 or more output
     113             :    links.
     114             : 
     115             :    All input links will be automatically polled by the tile
     116             :    infrastructure, and output links will automatically source and manage
     117             :    credits from consumers. */
     118             : struct fd_topo_tile {
     119             :   ulong id;                     /* The ID of this tile.  Indexed from [0, tile_cnt).  When placed in a topology, the ID must be the index of the tile in the tiles list. */
     120             :   char  name[ 7UL ];            /* The name of this tile.  There can be multiple of each tile name in a topology. */
     121             :   ulong kind_id;                /* The ID of this tile within its name.  If there are n tile of a particular name, they have IDs [0, N).  The pair (name, kind_id) uniquely identifies a tile, as does "id" on its own. */
     122             :   int   is_agave;               /* If the tile needs to run in the Agave (Anza) address space or not. */
     123             :   int   allow_shutdown;         /* If the tile is allowed to shutdown gracefully.  If false, when the tile exits it will tear down the entire application. */
     124             : 
     125             :   ulong cpu_idx;                /* The CPU index to pin the tile on.  A value of ULONG_MAX or more indicates the tile should be floating and not pinned to a core. */
     126             : 
     127             :   ulong in_cnt;                 /* The number of links that this tile reads from. */
     128             :   ulong in_link_id[ FD_TOPO_MAX_TILE_IN_LINKS ];       /* The link_id of each link that this tile reads from, indexed in [0, in_cnt). */
     129             :   int   in_link_reliable[ FD_TOPO_MAX_TILE_IN_LINKS ]; /* If each link that this tile reads from is a reliable or unreliable consumer, indexed in [0, in_cnt). */
     130             :   int   in_link_poll[ FD_TOPO_MAX_TILE_IN_LINKS ];     /* If each link that this tile reads from should be polled by the tile infrastructure, indexed in [0, in_cnt).
     131             :                                                           If the link is not polled, the tile will not receive frags for it and the tile writer is responsible for
     132             :                                                           reading from the link.  The link must be marked as unreliable as it is not flow controlled. */
     133             : 
     134             :   ulong out_cnt;                                   /* The number of links that this tile writes to. */
     135             :   ulong out_link_id[ FD_TOPO_MAX_TILE_OUT_LINKS ]; /* The link_id of each link that this tile writes to, indexed in [0, link_cnt). */
     136             : 
     137             :   ulong tile_obj_id;
     138             :   ulong metrics_obj_id;
     139             :   ulong keyswitch_obj_id;
     140             :   ulong in_link_fseq_obj_id[ FD_TOPO_MAX_TILE_IN_LINKS ];
     141             : 
     142             :   ulong uses_obj_cnt;
     143             :   ulong uses_obj_id[ FD_TOPO_MAX_TILE_OBJS ];
     144             :   int   uses_obj_mode[ FD_TOPO_MAX_TILE_OBJS ];
     145             : 
     146             :   /* Computed fields.  These are not supplied as configuration but calculated as needed. */
     147             :   struct {
     148             :     ulong *    metrics; /* The shared memory for metrics that this tile should write.  Consumer by monitoring and metrics writing tiles. */
     149             : 
     150             :     /* The fseq of each link that this tile reads from.  Multiple fseqs
     151             :        may point to the link, if there are multiple consumers.  An fseq
     152             :        can be uniquely identified via (link_id, tile_id), or (link_kind,
     153             :        link_kind_id, tile_kind, tile_kind_id) */
     154             :     ulong *    in_link_fseq[ FD_TOPO_MAX_TILE_IN_LINKS ];
     155             :   };
     156             : 
     157             :   /* Configuration fields.  These are required to be known by the topology so it can determine the
     158             :      total size of Firedancer in memory. */
     159             :   union {
     160             :     fd_topo_net_tile_t net;
     161             : 
     162             :     struct {
     163             :       fd_topo_net_tile_t net;
     164             :       char interface[ 16 ];
     165             : 
     166             :       /* xdp specific options */
     167             :       ulong  xdp_rx_queue_size;
     168             :       ulong  xdp_tx_queue_size;
     169             :       ulong  free_ring_depth;
     170             :       long   tx_flush_timeout_ns;
     171             :       char   xdp_mode[8];
     172             :       int    zero_copy;
     173             : 
     174             :       ulong netdev_dbl_buf_obj_id; /* dbl_buf containing netdev_tbl */
     175             :       ulong fib4_main_obj_id;      /* fib4 containing main route table */
     176             :       ulong fib4_local_obj_id;     /* fib4 containing local route table */
     177             :       ulong neigh4_obj_id;         /* neigh4 hash map header */
     178             :       ulong neigh4_ele_obj_id;     /* neigh4 hash map slots */
     179             :     } xdp;
     180             : 
     181             :     struct {
     182             :       fd_topo_net_tile_t net;
     183             :       /* sock specific options */
     184             :       int so_sndbuf;
     185             :       int so_rcvbuf;
     186             :     } sock;
     187             : 
     188             :     struct {
     189             :       ulong netdev_dbl_buf_obj_id; /* dbl_buf containing netdev_tbl */
     190             :       ulong fib4_main_obj_id;      /* fib4 containing main route table */
     191             :       ulong fib4_local_obj_id;     /* fib4 containing local route table */
     192             :       char  neigh_if[ 16 ];        /* neigh4 interface name */
     193             :       ulong neigh4_obj_id;         /* neigh4 hash map header */
     194             :       ulong neigh4_ele_obj_id;     /* neigh4 hash map slots */
     195             :     } netlink;
     196             : 
     197             :     struct {
     198             :       uint   out_depth;
     199             :       uint   reasm_cnt;
     200             :       ulong  max_concurrent_connections;
     201             :       ulong  max_concurrent_handshakes;
     202             :       ushort quic_transaction_listen_port;
     203             :       ulong  idle_timeout_millis;
     204             :       uint   ack_delay_millis;
     205             :       int    retry;
     206             :       char   key_log_path[ PATH_MAX ];
     207             :     } quic;
     208             : 
     209             :     struct {
     210             :       ulong tcache_depth;
     211             :     } verify;
     212             : 
     213             :     struct {
     214             :       ulong tcache_depth;
     215             :     } dedup;
     216             : 
     217             :     struct {
     218             :       char  url[ 256 ];
     219             :       ulong url_len;
     220             :       char  sni[ 256 ];
     221             :       ulong sni_len;
     222             :       char  identity_key_path[ PATH_MAX ];
     223             :       char  key_log_path[ PATH_MAX ];
     224             :       ulong buf_sz;
     225             :       ulong ssl_heap_sz;
     226             :       ulong keepalive_interval_nanos;
     227             :       uchar tls_cert_verify : 1;
     228             :     } bundle;
     229             : 
     230             :     struct {
     231             :       ulong max_pending_transactions;
     232             :       ulong bank_tile_count;
     233             :       int   larger_max_cost_per_block;
     234             :       int   larger_shred_limits_per_block;
     235             :       int   use_consumed_cus;
     236             :       int   schedule_strategy;
     237             :       struct {
     238             :         int   enabled;
     239             :         uchar tip_distribution_program_addr[ 32 ];
     240             :         uchar tip_payment_program_addr[ 32 ];
     241             :         uchar tip_distribution_authority[ 32 ];
     242             :         ulong commission_bps;
     243             :         char  identity_key_path[ PATH_MAX ];
     244             :         char  vote_account_path[ PATH_MAX ]; /* or pubkey is okay */
     245             :       } bundle;
     246             :     } pack;
     247             : 
     248             :     struct {
     249             :       int   lagged_consecutive_leader_start;
     250             :       int   plugins_enabled;
     251             :       ulong bank_cnt;
     252             :       char  identity_key_path[ PATH_MAX ];
     253             :       struct {
     254             :         int   enabled;
     255             :         uchar tip_payment_program_addr[ 32 ];
     256             :         uchar tip_distribution_program_addr[ 32 ];
     257             :         char  vote_account_path[ PATH_MAX ];
     258             :       } bundle;
     259             :     } poh;
     260             : 
     261             :     struct {
     262             :       ulong             depth;
     263             :       ulong             fec_resolver_depth;
     264             :       char              identity_key_path[ PATH_MAX ];
     265             :       ushort            shred_listen_port;
     266             :       int               larger_shred_limits_per_block;
     267             :       ulong             expected_shred_version;
     268             :       ulong             adtl_dests_retransmit_cnt;
     269             :       fd_topo_ip_port_t adtl_dests_retransmit[ FD_TOPO_ADTL_DESTS_MAX ];
     270             :       ulong             adtl_dests_leader_cnt;
     271             :       fd_topo_ip_port_t adtl_dests_leader[ FD_TOPO_ADTL_DESTS_MAX ];
     272             :     } shred;
     273             : 
     274             :     struct {
     275             :       ulong disable_blockstore_from_slot;
     276             :     } store;
     277             : 
     278             :     struct {
     279             :       char   identity_key_path[ PATH_MAX ];
     280             :     } sign;
     281             : 
     282             :     struct {
     283             :       uint   listen_addr;
     284             :       ushort listen_port;
     285             : 
     286             :       int    is_voting;
     287             : 
     288             :       char   cluster[ 32 ];
     289             :       char   identity_key_path[ PATH_MAX ];
     290             :       char   vote_key_path[ PATH_MAX ];
     291             : 
     292             :       ulong  max_http_connections;
     293             :       ulong  max_websocket_connections;
     294             :       ulong  max_http_request_length;
     295             :       ulong  send_buffer_size_mb;
     296             :       int    schedule_strategy;
     297             :     } gui;
     298             : 
     299             :     struct {
     300             :       uint   prometheus_listen_addr;
     301             :       ushort prometheus_listen_port;
     302             :     } metric;
     303             : 
     304             :     struct {
     305             :       ulong fec_max;
     306             :       ulong max_vote_accounts;
     307             : 
     308             :       int   tx_metadata_storage;
     309             :       ulong funk_obj_id;
     310             :       char  funk_checkpt[ PATH_MAX ];
     311             :       char  genesis[ PATH_MAX ];
     312             :       char  slots_replayed[ PATH_MAX ];
     313             :       char  shred_cap[ PATH_MAX ];
     314             :       char  status_cache[ PATH_MAX ];
     315             :       char  cluster_version[ 32 ];
     316             :       char  tower_checkpt[ PATH_MAX ];
     317             :       int   plugins_enabled;
     318             : 
     319             :       char  identity_key_path[ PATH_MAX ];
     320             :       uint  ip_addr;
     321             :       char  vote_account_path[ PATH_MAX ];
     322             : 
     323             :       char  blockstore_file[ PATH_MAX ];
     324             :       char  blockstore_checkpt[ PATH_MAX ];
     325             : 
     326             :       /* not specified in TOML */
     327             : 
     328             :       ulong enable_features_cnt;
     329             :       char  enable_features[ 16 ][ FD_BASE58_ENCODED_32_SZ ];
     330             : 
     331             :       ulong enable_bank_hash_cmp;
     332             : 
     333             :       ulong max_exec_slices;
     334             : 
     335             :       ulong capture_start_slot;
     336             :       char  solcap_capture[ PATH_MAX ];
     337             :       char  dump_proto_dir[ PATH_MAX ];
     338             :       int   dump_block_to_pb;
     339             : 
     340             :       ulong manifest_dcache_obj_id;
     341             :     } replay;
     342             : 
     343             :     struct {
     344             :       ulong funk_obj_id;
     345             : 
     346             :       ulong capture_start_slot;
     347             :       char  dump_proto_dir[ PATH_MAX ];
     348             :       int   dump_instr_to_pb;
     349             :       int   dump_txn_to_pb;
     350             :       int   dump_syscall_to_pb;
     351             :     } exec;
     352             : 
     353             :     struct {
     354             :       ulong funk_obj_id;
     355             :     } writer;
     356             : 
     357             :     struct {
     358             :       ushort send_to_port;
     359             :       uint   send_to_ip_addr;
     360             :       ulong  conn_cnt;
     361             :       int    no_quic;
     362             :     } benchs;
     363             : 
     364             :     struct {
     365             :       ushort rpc_port;
     366             :       uint   rpc_ip_addr;
     367             :     } bencho;
     368             : 
     369             :     struct {
     370             :       ulong accounts_cnt;
     371             :       int   mode;
     372             :       float contending_fraction;
     373             :       float cu_price_spread;
     374             :     } benchg;
     375             : 
     376             :     struct {
     377             :       ushort  gossip_listen_port;
     378           0 : #     define FD_TOPO_GOSSIP_ENTRYPOINTS_MAX 16
     379             :       ulong   entrypoints_cnt;
     380             :       fd_ip4_port_t entrypoints[ FD_TOPO_GOSSIP_ENTRYPOINTS_MAX ];
     381             :       uint    ip_addr;
     382             :       char    identity_key_path[ PATH_MAX ];
     383             :       ushort  tvu_port;
     384             :       ushort  tpu_port;
     385             :       ushort  tpu_quic_port;
     386             :       ushort  tpu_vote_port;
     387             :       ushort  repair_serve_port;
     388             :       ulong   expected_shred_version;
     389             :     } gossip;
     390             : 
     391             :     struct {
     392             :       ushort  repair_intake_listen_port;
     393             :       ushort  repair_serve_listen_port;
     394             :       char    good_peer_cache_file[ PATH_MAX ];
     395             : 
     396             :       /* non-config */
     397             : 
     398             :       int     good_peer_cache_file_fd;
     399             :       char    identity_key_path[ PATH_MAX ];
     400             :       ulong   max_pending_shred_sets;
     401             :       ulong   slot_max;
     402             :     } repair;
     403             : 
     404             :     struct {
     405             :       char  slots_pending[PATH_MAX];
     406             : 
     407             :       ulong expected_shred_version;
     408             : 
     409             :       /* non-config */
     410             : 
     411             :       char  identity_key_path[ PATH_MAX ];
     412             :       char  shred_cap_archive[ PATH_MAX ];
     413             :       char  shred_cap_replay[ PATH_MAX ];
     414             :       ulong shred_cap_end_slot;
     415             : 
     416             :       char  blockstore_file[ PATH_MAX ];
     417             :       char  blockstore_restore[ PATH_MAX ];
     418             :     } store_int;
     419             : 
     420             :     struct {
     421             :       ushort  send_src_port;
     422             : 
     423             :       /* non-config */
     424             : 
     425             :       uint    ip_addr;
     426             :       char  identity_key_path[ PATH_MAX ];
     427             :     } send;
     428             : 
     429             :     struct {
     430             :       ulong   funk_obj_id;
     431             :       ushort  rpc_port;
     432             :       ushort  tpu_port;
     433             :       uint    tpu_ip_addr;
     434             :       char    identity_key_path[ PATH_MAX ];
     435             :       uint    block_index_max;
     436             :       uint    txn_index_max;
     437             :       uint    acct_index_max;
     438             :       char    history_file[ PATH_MAX ];
     439             :     } rpcserv;
     440             : 
     441             :     struct {
     442             :       uint fake_dst_ip;
     443             :     } pktgen;
     444             : 
     445             :     struct {
     446             :       ulong end_slot;
     447             :       char  rocksdb_path[ PATH_MAX ];
     448             :       char  shredcap_path[ PATH_MAX ];
     449             :       char  bank_hash_path[ PATH_MAX ];
     450             :       char  ingest_mode[ 32 ];
     451             : 
     452             :       /* Set internally by the archiver tile */
     453             :       int archive_fd;
     454             :     } archiver;
     455             : 
     456             :     struct {
     457             :       ulong funk_obj_id;
     458             :       char  identity_key_path[ PATH_MAX ];
     459             :       char  vote_acc_path[ PATH_MAX ];
     460             :     } tower;
     461             :     struct {
     462             :       char   folder_path[ PATH_MAX ];
     463             :       ushort repair_intake_listen_port;
     464             :       ulong   write_buffer_size; /* Size of the write buffer for the capture tile */
     465             :       int    enable_publish_stake_weights;
     466             :       char   manifest_path[ PATH_MAX ];
     467             : 
     468             :       /* Set internally by the capture tile */
     469             :       int shreds_fd;
     470             :       int requests_fd;
     471             :       int fecs_fd;
     472             :       int peers_fd;
     473             :       int bank_hashes_fd;
     474             :       int slices_fd;
     475             :     } shredcap;
     476             : 
     477             :     struct {
     478             :       char  snapshots_path[ PATH_MAX ];
     479             :       char  cluster[ 8UL ];
     480             :       int   incremental_snapshot_fetch;
     481             :       int   do_download;
     482             :       uint  maximum_local_snapshot_age;
     483             :       uint  minimum_download_speed_mib;
     484             :       uint  maximum_download_retry_abort;
     485             :     } snaprd;
     486             : 
     487             :     struct {
     488             :       ulong funk_obj_id;
     489             :     } snapin;
     490             : 
     491             :   };
     492             : };
     493             : 
     494             : typedef struct fd_topo_tile fd_topo_tile_t;
     495             : 
     496             : typedef struct {
     497             :   ulong id;
     498             :   char  name[ 13UL ];
     499             :   ulong wksp_id;
     500             : 
     501             :   ulong offset;
     502             :   ulong footprint;
     503             : } fd_topo_obj_t;
     504             : 
     505             : /* An fd_topo_t represents the overall structure of a Firedancer
     506             :    configuration, describing all the workspaces, tiles, and links
     507             :    between them. */
     508             : struct fd_topo {
     509             :   char           app_name[ 256UL ];
     510             :   uchar          props[ 16384UL ];
     511             : 
     512             :   ulong          wksp_cnt;
     513             :   ulong          link_cnt;
     514             :   ulong          tile_cnt;
     515             :   ulong          obj_cnt;
     516             : 
     517             :   fd_topo_wksp_t workspaces[ FD_TOPO_MAX_WKSPS ];
     518             :   fd_topo_link_t links[ FD_TOPO_MAX_LINKS ];
     519             :   fd_topo_tile_t tiles[ FD_TOPO_MAX_TILES ];
     520             :   fd_topo_obj_t  objs[ FD_TOPO_MAX_OBJS ];
     521             : 
     522             :   ulong          agave_affinity_cnt;
     523             :   ulong          agave_affinity_cpu_idx[ FD_TILE_MAX ];
     524             : 
     525             :   ulong          max_page_size; /* 2^21 or 2^30 */
     526             :   ulong          gigantic_page_threshold; /* see [hugetlbfs.gigantic_page_threshold_mib]*/
     527             : };
     528             : typedef struct fd_topo fd_topo_t;
     529             : 
     530             : typedef struct {
     531             :   char const * name;
     532             : 
     533             :   int          keep_host_networking;
     534             :   int          allow_connect;
     535             :   ulong        rlimit_file_cnt;
     536             :   ulong        rlimit_address_space;
     537             :   ulong        rlimit_data;
     538             :   int          for_tpool;
     539             : 
     540             :   ulong (*populate_allowed_seccomp)( fd_topo_t const * topo, fd_topo_tile_t const * tile, ulong out_cnt, struct sock_filter * out );
     541             :   ulong (*populate_allowed_fds    )( fd_topo_t const * topo, fd_topo_tile_t const * tile, ulong out_fds_sz, int * out_fds );
     542             :   ulong (*scratch_align           )( void );
     543             :   ulong (*scratch_footprint       )( fd_topo_tile_t const * tile );
     544             :   ulong (*loose_footprint         )( fd_topo_tile_t const * tile );
     545             :   void  (*privileged_init         )( fd_topo_t * topo, fd_topo_tile_t * tile );
     546             :   void  (*unprivileged_init       )( fd_topo_t * topo, fd_topo_tile_t * tile );
     547             :   void  (*run                     )( fd_topo_t * topo, fd_topo_tile_t * tile );
     548             :   ulong (*rlimit_file_cnt_fn      )( fd_topo_t const * topo, fd_topo_tile_t const * tile );
     549             : } fd_topo_run_tile_t;
     550             : 
     551             : struct fd_topo_obj_callbacks {
     552             :   char const * name;
     553             :   ulong (* footprint )( fd_topo_t const * topo, fd_topo_obj_t const * obj );
     554             :   ulong (* align     )( fd_topo_t const * topo, fd_topo_obj_t const * obj );
     555             :   ulong (* loose     )( fd_topo_t const * topo, fd_topo_obj_t const * obj );
     556             :   void  (* new       )( fd_topo_t const * topo, fd_topo_obj_t const * obj );
     557             : };
     558             : 
     559             : typedef struct fd_topo_obj_callbacks fd_topo_obj_callbacks_t;
     560             : 
     561             : FD_PROTOTYPES_BEGIN
     562             : 
     563             : FD_FN_CONST static inline ulong
     564           0 : fd_topo_workspace_align( void ) {
     565             :   /* This needs to be the max( align ) of all the child members that
     566             :      could be aligned into this workspace, otherwise our footprint
     567             :      calculation will not be correct.  For now just set to 4096 but this
     568             :      should probably be calculated dynamically, or we should reduce
     569             :      those child aligns if we can. */
     570           0 :   return 4096UL;
     571           0 : }
     572             : 
     573             : static inline void *
     574             : fd_topo_obj_laddr( fd_topo_t const * topo,
     575          24 :                    ulong             obj_id ) {
     576          24 :   fd_topo_obj_t const * obj = &topo->objs[ obj_id ];
     577          24 :   FD_TEST( obj_id<FD_TOPO_MAX_OBJS );
     578          24 :   FD_TEST( obj->id == obj_id );
     579          24 :   FD_TEST( obj->offset );
     580          24 :   return (void *)((ulong)topo->workspaces[ obj->wksp_id ].wksp + obj->offset);
     581          24 : }
     582             : 
     583             : FD_FN_PURE static inline ulong
     584             : fd_topo_tile_name_cnt( fd_topo_t const * topo,
     585           0 :                        char const *      name ) {
     586           0 :   ulong cnt = 0;
     587           0 :   for( ulong i=0; i<topo->tile_cnt; i++ ) {
     588           0 :     if( FD_UNLIKELY( !strcmp( topo->tiles[ i ].name, name ) ) ) cnt++;
     589           0 :   }
     590           0 :   return cnt;
     591           0 : }
     592             : 
     593             : /* Finds the workspace of a given name in the topology.  Returns
     594             :    ULONG_MAX if there is no such workspace.  There can be at most one
     595             :    workspace of a given name. */
     596             : 
     597             : FD_FN_PURE static inline ulong
     598             : fd_topo_find_wksp( fd_topo_t const * topo,
     599          39 :                    char const *      name ) {
     600          39 :   for( ulong i=0; i<topo->wksp_cnt; i++ ) {
     601          39 :     if( FD_UNLIKELY( !strcmp( topo->workspaces[ i ].name, name ) ) ) return i;
     602          39 :   }
     603           0 :   return ULONG_MAX;
     604          39 : }
     605             : 
     606             : /* Find the tile of a given name and kind_id in the topology, there will
     607             :    be at most one such tile, since kind_id is unique among the name.
     608             :    Returns ULONG_MAX if there is no such tile. */
     609             : 
     610             : FD_FN_PURE static inline ulong
     611             : fd_topo_find_tile( fd_topo_t const * topo,
     612             :                    char const *      name,
     613           6 :                    ulong             kind_id ) {
     614           6 :   for( ulong i=0; i<topo->tile_cnt; i++ ) {
     615           6 :     if( FD_UNLIKELY( !strcmp( topo->tiles[ i ].name, name ) ) && topo->tiles[ i ].kind_id == kind_id ) return i;
     616           6 :   }
     617           0 :   return ULONG_MAX;
     618           6 : }
     619             : 
     620             : /* Find the link of a given name and kind_id in the topology, there will
     621             :    be at most one such link, since kind_id is unique among the name.
     622             :    Returns ULONG_MAX if there is no such link. */
     623             : 
     624             : FD_FN_PURE static inline ulong
     625             : fd_topo_find_link( fd_topo_t const * topo,
     626             :                    char const *      name,
     627           6 :                    ulong             kind_id ) {
     628           9 :   for( ulong i=0; i<topo->link_cnt; i++ ) {
     629           9 :     if( FD_UNLIKELY( !strcmp( topo->links[ i ].name, name ) ) && topo->links[ i ].kind_id == kind_id ) return i;
     630           9 :   }
     631           0 :   return ULONG_MAX;
     632           6 : }
     633             : 
     634             : FD_FN_PURE static inline ulong
     635             : fd_topo_find_tile_in_link( fd_topo_t const *      topo,
     636             :                            fd_topo_tile_t const * tile,
     637             :                            char const *           name,
     638           0 :                            ulong                  kind_id ) {
     639           0 :   for( ulong i=0; i<tile->in_cnt; i++ ) {
     640           0 :     if( FD_UNLIKELY( !strcmp( topo->links[ tile->in_link_id[ i ] ].name, name ) )
     641           0 :         && topo->links[ tile->in_link_id[ i ] ].kind_id == kind_id ) return i;
     642           0 :   }
     643           0 :   return ULONG_MAX;
     644           0 : }
     645             : 
     646             : FD_FN_PURE static inline ulong
     647             : fd_topo_find_tile_out_link( fd_topo_t const *      topo,
     648             :                             fd_topo_tile_t const * tile,
     649             :                             char const *           name,
     650           0 :                             ulong                  kind_id ) {
     651           0 :   for( ulong i=0; i<tile->out_cnt; i++ ) {
     652           0 :     if( FD_UNLIKELY( !strcmp( topo->links[ tile->out_link_id[ i ] ].name, name ) )
     653           0 :         && topo->links[ tile->out_link_id[ i ] ].kind_id == kind_id ) return i;
     654           0 :   }
     655           0 :   return ULONG_MAX;
     656           0 : }
     657             : 
     658             : /* Find the id of the tile which is a producer for the given link.  If
     659             :    no tile is a producer for the link, returns ULONG_MAX.  This should
     660             :    not be possible for a well formed and validated topology.  */
     661             : FD_FN_PURE static inline ulong
     662             : fd_topo_find_link_producer( fd_topo_t const *      topo,
     663           0 :                             fd_topo_link_t const * link ) {
     664           0 :   for( ulong i=0; i<topo->tile_cnt; i++ ) {
     665           0 :     fd_topo_tile_t const * tile = &topo->tiles[ i ];
     666             : 
     667           0 :     for( ulong j=0; j<tile->out_cnt; j++ ) {
     668           0 :       if( FD_UNLIKELY( tile->out_link_id[ j ] == link->id ) ) return i;
     669           0 :     }
     670           0 :   }
     671           0 :   return ULONG_MAX;
     672           0 : }
     673             : 
     674             : /* Given a link, count the number of consumers of that link among all
     675             :    the tiles in the topology. */
     676             : FD_FN_PURE static inline ulong
     677             : fd_topo_link_consumer_cnt( fd_topo_t const *      topo,
     678           0 :                            fd_topo_link_t const * link ) {
     679           0 :   ulong cnt = 0;
     680           0 :   for( ulong i=0; i<topo->tile_cnt; i++ ) {
     681           0 :     fd_topo_tile_t const * tile = &topo->tiles[ i ];
     682           0 :     for( ulong j=0; j<tile->in_cnt; j++ ) {
     683           0 :       if( FD_UNLIKELY( tile->in_link_id[ j ] == link->id ) ) cnt++;
     684           0 :     }
     685           0 :   }
     686             : 
     687           0 :   return cnt;
     688           0 : }
     689             : 
     690             : /* Given a link, count the number of reliable consumers of that link
     691             :    among all the tiles in the topology. */
     692             : FD_FN_PURE static inline ulong
     693             : fd_topo_link_reliable_consumer_cnt( fd_topo_t const *      topo,
     694           0 :                                     fd_topo_link_t const * link ) {
     695           0 :   ulong cnt = 0;
     696           0 :   for( ulong i=0; i<topo->tile_cnt; i++ ) {
     697           0 :     fd_topo_tile_t const * tile = &topo->tiles[ i ];
     698           0 :     for( ulong j=0; j<tile->in_cnt; j++ ) {
     699           0 :       if( FD_UNLIKELY( tile->in_link_id[ j ] == link->id && tile->in_link_reliable[ j ] ) ) cnt++;
     700           0 :     }
     701           0 :   }
     702           0 : 
     703           0 :   return cnt;
     704           0 : }
     705             : 
     706             : FD_FN_PURE static inline ulong
     707             : fd_topo_tile_consumer_cnt( fd_topo_t const *      topo,
     708           0 :                            fd_topo_tile_t const * tile ) {
     709           0 :   (void)topo;
     710           0 :   return tile->out_cnt;
     711           0 : }
     712             : 
     713             : FD_FN_PURE static inline ulong
     714             : fd_topo_tile_reliable_consumer_cnt( fd_topo_t const *      topo,
     715           0 :                                     fd_topo_tile_t const * tile ) {
     716           0 :   ulong reliable_cons_cnt = 0UL;
     717           0 :   for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
     718           0 :     fd_topo_tile_t const * consumer_tile = &topo->tiles[ i ];
     719           0 :     for( ulong j=0UL; j<consumer_tile->in_cnt; j++ ) {
     720           0 :       for( ulong k=0UL; k<tile->out_cnt; k++ ) {
     721           0 :         if( FD_UNLIKELY( consumer_tile->in_link_id[ j ]==tile->out_link_id[ k ] && consumer_tile->in_link_reliable[ j ] ) ) {
     722           0 :           reliable_cons_cnt++;
     723           0 :         }
     724           0 :       }
     725           0 :     }
     726           0 :   }
     727           0 :   return reliable_cons_cnt;
     728           0 : }
     729             : 
     730             : FD_FN_PURE static inline ulong
     731             : fd_topo_tile_producer_cnt( fd_topo_t const *     topo,
     732           0 :                            fd_topo_tile_t const * tile ) {
     733           0 :   (void)topo;
     734           0 :   ulong in_cnt = 0UL;
     735           0 :   for( ulong i=0UL; i<tile->in_cnt; i++ ) {
     736           0 :     if( FD_UNLIKELY( !tile->in_link_poll[ i ] ) ) continue;
     737           0 :     in_cnt++;
     738           0 :   }
     739           0 :   return in_cnt;
     740           0 : }
     741             : 
     742             : /* Join (map into the process) all shared memory (huge/gigantic pages)
     743             :    needed by the tile, in the given topology.  All memory associated
     744             :    with the tile (aka. used by links that the tile either produces to or
     745             :    consumes from, or used by the tile itself for its cnc) will be
     746             :    attached (mapped into the process).
     747             : 
     748             :    This is needed to play nicely with the sandbox.  Once a process is
     749             :    sandboxed we can no longer map any memory. */
     750             : void
     751             : fd_topo_join_tile_workspaces( fd_topo_t *      topo,
     752             :                               fd_topo_tile_t * tile );
     753             : 
     754             : /* Join (map into the process) the shared memory (huge/gigantic pages)
     755             :    for the given workspace.  Mode is one of
     756             :    FD_SHMEM_JOIN_MODE_READ_WRITE or FD_SHMEM_JOIN_MODE_READ_ONLY and
     757             :    determines the prot argument that will be passed to mmap when mapping
     758             :    the pages in (PROT_WRITE or PROT_READ respectively). */
     759             : void
     760             : fd_topo_join_workspace( fd_topo_t *      topo,
     761             :                         fd_topo_wksp_t * wksp,
     762             :                         int              mode );
     763             : 
     764             : /* Join (map into the process) all shared memory (huge/gigantic pages)
     765             :    needed by all tiles in the topology.  Mode is one of
     766             :    FD_SHMEM_JOIN_MODE_READ_WRITE or FD_SHMEM_JOIN_MODE_READ_ONLY and
     767             :    determines the prot argument that will be passed to mmap when
     768             :    mapping the pages in (PROT_WRITE or PROT_READ respectively). */
     769             : void
     770             : fd_topo_join_workspaces( fd_topo_t *  topo,
     771             :                          int          mode );
     772             : 
     773             : /* Leave (unmap from the process) the shared memory needed for the
     774             :    given workspace in the topology, if it was previously mapped.
     775             : 
     776             :    topo and wksp are assumed non-NULL.  It is OK if the workspace
     777             :    has not been previously joined, in which case this is a no-op. */
     778             : 
     779             : void
     780             : fd_topo_leave_workspace( fd_topo_t *      topo,
     781             :                          fd_topo_wksp_t * wksp );
     782             : 
     783             : /* Leave (unmap from the process) all shared memory needed by all
     784             :    tiles in the topology, if each of them was mapped.
     785             : 
     786             :    topo is assumed non-NULL.  Only workspaces which were previously
     787             :    joined are unmapped. */
     788             : 
     789             : void
     790             : fd_topo_leave_workspaces( fd_topo_t * topo );
     791             : 
     792             : /* Create the given workspace needed by the topology on the system.
     793             :    This does not "join" the workspaces (map their memory into the
     794             :    process), but only creates the .wksp file and formats it correctly
     795             :    as a workspace.
     796             : 
     797             :    Returns 0 on success and -1 on failure, with errno set to the error.
     798             :    The only reason for failure currently that will be returned is
     799             :    ENOMEM, as other unexpected errors will cause the program to exit.
     800             : 
     801             :    If update_existing is 1, the workspace will not be created from
     802             :    scratch but it will be assumed that it already exists from a prior
     803             :    run and needs to be maybe resized and then have the header
     804             :    structures reinitialized.  This can save a very expensive operation
     805             :    of zeroing all of the workspace pages.  This is dangerous in
     806             :    production because it can leave stray memory from prior runs around,
     807             :    and should only be used in development environments. */
     808             : 
     809             : int
     810             : fd_topo_create_workspace( fd_topo_t *      topo,
     811             :                           fd_topo_wksp_t * wksp,
     812             :                           int              update_existing );
     813             : 
     814             : /* Join the standard IPC objects needed by the topology of this particular
     815             :    tile */
     816             : 
     817             : void
     818             : fd_topo_fill_tile( fd_topo_t *      topo,
     819             :                    fd_topo_tile_t * tile );
     820             : 
     821             : /* Same as fd_topo_fill_tile but fills in all the objects for a
     822             :    particular workspace with the given mode. */
     823             : void
     824             : fd_topo_workspace_fill( fd_topo_t *      topo,
     825             :                         fd_topo_wksp_t * wksp );
     826             : 
     827             : /* Apply a new function to every object that is resident in the given
     828             :    workspace in the topology. */
     829             : 
     830             : void
     831             : fd_topo_wksp_new( fd_topo_t const *          topo,
     832             :                   fd_topo_wksp_t const *     wksp,
     833             :                   fd_topo_obj_callbacks_t ** callbacks );
     834             : 
     835             : /* Same as fd_topo_fill_tile but fills in all tiles in the topology. */
     836             : 
     837             : void
     838             : fd_topo_fill( fd_topo_t * topo );
     839             : 
     840             : /* fd_topo_tile_stack_join joins a huge page optimized stack for the
     841             :    provided tile.  The stack is assumed to already exist at a known
     842             :    path in the hugetlbfs mount. */
     843             : 
     844             : void *
     845             : fd_topo_tile_stack_join( char const * app_name,
     846             :                          char const * tile_name,
     847             :                          ulong        tile_kind_id );
     848             : 
     849             : /* Install the XDP program needed by the net tiles into the local device
     850             :    and return the xsk_map_fd.  bind_addr is an optional IPv4 address to
     851             :    used for filtering by dst IP. */
     852             : 
     853             : fd_xdp_fds_t
     854             : fd_topo_install_xdp( fd_topo_t const * topo,
     855             :                      uint              bind_addr );
     856             : 
     857             : /* fd_topo_run_single_process runs all the tiles in a single process
     858             :    (the calling process).  This spawns a thread for each tile, switches
     859             :    that thread to the given UID and GID and then runs the tile in it.
     860             :    Each thread will never exit, as tiles are expected to run forever.
     861             :    An error is logged and the application will exit if a tile exits.
     862             :    The function itself does return after spawning all the threads.
     863             : 
     864             :    The threads will not be sandboxed in any way, except switching to the
     865             :    provided UID and GID, so they will share the same address space, and
     866             :    not have any seccomp restrictions or use any Linux namespaces.  The
     867             :    calling thread will also switch to the provided UID and GID before
     868             :    it returns.
     869             : 
     870             :    In production, when running with an Agave child process this is
     871             :    used for spawning certain tiles inside the Agave address space.
     872             :    It's also useful for tooling and debugging, but is not how the main
     873             :    production Firedancer process runs.  For production, each tile is run
     874             :    in its own address space with a separate process and full security
     875             :    sandbox.
     876             : 
     877             :    The agave argument determines which tiles are started.  If the
     878             :    argument is 0 or 1, only non-agave (or only agave) tiles are started.
     879             :    If the argument is any other value, all tiles in the topology are
     880             :    started regardless of if they are Agave tiles or not. */
     881             : 
     882             : void
     883             : fd_topo_run_single_process( fd_topo_t *       topo,
     884             :                             int               agave,
     885             :                             uint              uid,
     886             :                             uint              gid,
     887             :                             fd_topo_run_tile_t (* tile_run )( fd_topo_tile_t const * tile ) );
     888             : 
     889             : /* fd_topo_run_tile runs the given tile directly within the current
     890             :    process (and thread).  The function will never return, as tiles are
     891             :    expected to run forever.  An error is logged and the application will
     892             :    exit if the tile exits.
     893             : 
     894             :    The sandbox argument determines if the current process will be
     895             :    sandboxed fully before starting the tile.  The thread will switch to
     896             :    the UID and GID provided before starting the tile, even if the thread
     897             :    is not being sandboxed.  Although POSIX specifies that all threads in
     898             :    a process must share a UID and GID, this is not the case on Linux.
     899             :    The thread will switch to the provided UID and GID without switching
     900             :    the other threads in the process.
     901             : 
     902             :    If keep_controlling_terminal is set to 0, and the sandbox is enabled
     903             :    the controlling terminal will be detached as an additional sandbox
     904             :    measure, but you will not be able to send Ctrl+C or other signals
     905             :    from the terminal.  See fd_sandbox.h for more information.
     906             : 
     907             :    The allow_fd argument is only used if sandbox is true, and is a file
     908             :    descriptor which will be allowed to exist in the process.  Normally
     909             :    the sandbox code rejects and aborts if there is an unexpected file
     910             :    descriptor present on boot.  This is helpful to allow a parent
     911             :    process to be notified on termination of the tile by waiting for a
     912             :    pipe file descriptor to get closed.
     913             : 
     914             :    wait and debugger are both used in debugging.  If wait is non-NULL,
     915             :    the runner will wait until the value pointed to by wait is non-zero
     916             :    before launching the tile.  Likewise, if debugger is non-NULL, the
     917             :    runner will wait until a debugger is attached before setting the
     918             :    value pointed to by debugger to non-zero.  These are intended to be
     919             :    used as a pair, where many tiles share a waiting reference, and then
     920             :    one of the tiles (a tile you want to attach the debugger to) has the
     921             :    same reference provided as the debugger, so all tiles will stop and
     922             :    wait for the debugger to attach to it before proceeding. */
     923             : 
     924             : void
     925             : fd_topo_run_tile( fd_topo_t *          topo,
     926             :                   fd_topo_tile_t *     tile,
     927             :                   int                  sandbox,
     928             :                   int                  keep_controlling_terminal,
     929             :                   int                  dumpable,
     930             :                   uint                 uid,
     931             :                   uint                 gid,
     932             :                   int                  allow_fd,
     933             :                   volatile int *       wait,
     934             :                   volatile int *       debugger,
     935             :                   fd_topo_run_tile_t * tile_run );
     936             : 
     937             : /* This is for determining the value of RLIMIT_MLOCK that we need to
     938             :    successfully run all tiles in separate processes.  The value returned
     939             :    is the maximum amount of memory that will be locked with mlock() by
     940             :    any individual process in the tree.  Specifically, if we have three
     941             :    tile processes, and they each need to lock 5, 9, and 2 MiB of memory
     942             :    respectively, RLIMIT_MLOCK needs to be 9 MiB to allow all three
     943             :    process mlock() calls to succeed.
     944             : 
     945             :    Tiles lock memory in three ways.  Any workspace they are using, they
     946             :    lock the entire workspace.  Then each tile uses huge pages for the
     947             :    stack which are also locked, and finally some tiles use private
     948             :    locked mmaps outside the workspace for storing key material.  The
     949             :    results here include all of this memory together.
     950             : 
     951             :    The result is not necessarily the amount of memory used by the tile
     952             :    process, although it will be quite close.  Tiles could potentially
     953             :    allocate memory (eg, with brk) without needing to lock it, which
     954             :    would not need to included, and some kernel memory that tiles cause
     955             :    to be allocated (for example XSK buffers) is also not included.  The
     956             :    actual amount of memory used will not be less than this value. */
     957             : FD_FN_PURE ulong
     958             : fd_topo_mlock_max_tile( fd_topo_t const * topo );
     959             : 
     960             : /* Same as fd_topo_mlock_max_tile, but for loading the entire topology
     961             :    into one process, rather than a separate process per tile.  This is
     962             :    used, for example, by the configuration code when it creates all the
     963             :    workspaces, or the monitor that maps the entire system into one
     964             :    address space. */
     965             : FD_FN_PURE ulong
     966             : fd_topo_mlock( fd_topo_t const * topo );
     967             : 
     968             : /* This returns the number of gigantic pages needed by the topology on
     969             :    the provided numa node.  It includes pages needed by the workspaces,
     970             :    as well as additional allocations like huge pages for process stacks
     971             :    and private key storage. */
     972             : 
     973             : FD_FN_PURE ulong
     974             : fd_topo_gigantic_page_cnt( fd_topo_t const * topo,
     975             :                            ulong             numa_idx );
     976             : 
     977             : /* This returns the number of huge pages in the application needed by
     978             :    the topology on the provided numa node.  It includes pages needed by
     979             :    things placed in the hugetlbfs (workspaces, process stacks).  If
     980             :    include_anonymous is true, it also includes anonymous hugepages which
     981             :    are needed but are not placed in the hugetlbfs. */
     982             : 
     983             : FD_FN_PURE ulong
     984             : fd_topo_huge_page_cnt( fd_topo_t const * topo,
     985             :                        ulong             numa_idx,
     986             :                        int               include_anonymous );
     987             : 
     988             : /* Prints a message describing the topology to an output stream.  If
     989             :    stdout is true, will be written to stdout, otherwise will be written
     990             :    as a NOTICE log message to the log file. */
     991             : void
     992             : fd_topo_print_log( int         stdout,
     993             :                    fd_topo_t * topo );
     994             : 
     995             : FD_PROTOTYPES_END
     996             : 
     997             : #endif /* HEADER_fd_src_disco_topo_fd_topo_h */

Generated by: LCOV version 1.14