Line data Source code
1 : #ifndef HEADER_fd_src_disco_topo_fd_topo_h
2 : #define HEADER_fd_src_disco_topo_fd_topo_h
3 :
4 : #include "../stem/fd_stem.h"
5 : #include "../../tango/fd_tango.h"
6 : #include "../../waltz/xdp/fd_xdp1.h"
7 : #include "../../ballet/base58/fd_base58.h"
8 : #include "../../util/net/fd_net_headers.h"
9 :
10 : /* Maximum number of workspaces that may be present in a topology. */
11 : #define FD_TOPO_MAX_WKSPS (256UL)
12 : /* Maximum number of links that may be present in a topology. */
13 : #define FD_TOPO_MAX_LINKS (256UL)
14 : /* Maximum number of tiles that may be present in a topology. */
15 0 : #define FD_TOPO_MAX_TILES (256UL)
16 : /* Maximum number of objects that may be present in a topology. */
17 : #define FD_TOPO_MAX_OBJS (4096UL)
18 : /* Maximum number of links that may go into any one tile in the
19 : topology. */
20 : #define FD_TOPO_MAX_TILE_IN_LINKS ( 128UL)
21 : /* Maximum number of links that a tile may write to. */
22 : #define FD_TOPO_MAX_TILE_OUT_LINKS ( 32UL)
23 : /* Maximum number of objects that a tile can use. */
24 : #define FD_TOPO_MAX_TILE_OBJS ( 256UL)
25 :
26 : /* Maximum number of additional ip addresses */
27 : #define FD_NET_MAX_SRC_ADDR 4
28 :
29 : /* Maximum number of additional destinations for leader shreds and for retransmitted shreds */
30 : #define FD_TOPO_ADTL_DESTS_MAX ( 32UL)
31 :
32 :
33 : /* A workspace is a Firedancer specific memory management structure that
34 : sits on top of 1 or more memory mapped gigantic or huge pages mounted
35 : to the hugetlbfs. */
36 : typedef struct {
37 : ulong id; /* The ID of this workspace. Indexed from [0, wksp_cnt). When placed in a topology, the ID must be the index of the workspace in the workspaces list. */
38 : char name[ 13UL ]; /* The name of this workspace, like "pack". There can be at most one of each workspace name in a topology. */
39 :
40 : ulong numa_idx; /* The index of the NUMA node on the system that this workspace should be allocated from. */
41 :
42 : ulong min_part_max; /* Artificially raise part_max */
43 : ulong min_loose_sz; /* Artificially raise loose footprint */
44 :
45 : /* Computed fields. These are not supplied as configuration but calculated as needed. */
46 : struct {
47 : ulong page_sz; /* The size of the pages that this workspace is backed by. One of FD_PAGE_SIZE_*. */
48 : ulong page_cnt; /* The number of pages that must be mapped to this workspace to store all the data needed by consumers. */
49 : ulong part_max; /* The maximum number of partitions in the underlying workspace. There can only be this many allocations made at any one time. */
50 :
51 : fd_wksp_t * wksp; /* The workspace memory in the local process. */
52 : ulong known_footprint; /* Total size in bytes of all data in Firedancer that will be stored in this workspace at startup. */
53 : ulong total_footprint; /* Total size in bytes of all data in Firedancer that could be stored in this workspace, includes known data and loose data. */
54 : };
55 : } fd_topo_wksp_t;
56 :
57 : /* A link is an mcache in a workspace that has one producer and one or
58 : more consumers. A link may optionally also have a dcache, that holds
59 : fragments referred to by the mcache entries.
60 :
61 : A link belongs to exactly one workspace. A link has exactly one
62 : producer, and 1 or more consumers. Each consumer is either reliable
63 : or not reliable. A link has a depth and a MTU, which correspond to
64 : the depth and MTU of the mcache and dcache respectively. A MTU of
65 : zero means no dcache is needed, as there is no data. */
66 : typedef struct {
67 : ulong id; /* The ID of this link. Indexed from [0, link_cnt). When placed in a topology, the ID must be the index of the link in the links list. */
68 : char name[ 13UL ]; /* The name of this link, like "pack_bank". There can be multiple of each link name in a topology. */
69 : ulong kind_id; /* The ID of this link within its name. If there are N links of a particular name, they have IDs [0, N). The pair (name, kind_id) uniquely identifies a link, as does "id" on its own. */
70 :
71 : ulong depth; /* The depth of the mcache representing the link. */
72 : ulong mtu; /* The MTU of data fragments in the mcache. A value of 0 means there is no dcache. */
73 : ulong burst; /* The max amount of MTU sized data fragments that might be bursted to the dcache. */
74 :
75 : ulong mcache_obj_id;
76 : ulong dcache_obj_id;
77 :
78 : /* Computed fields. These are not supplied as configuration but calculated as needed. */
79 : struct {
80 : fd_frag_meta_t * mcache; /* The mcache of this link. */
81 : void * dcache; /* The dcache of this link, if it has one. */
82 : };
83 :
84 : uint permit_no_consumers : 1; /* Permit a topology where this link has no consumers */
85 : uint permit_no_producers : 1; /* Permit a topology where this link has no producers */
86 : } fd_topo_link_t;
87 :
88 : /* Be careful: ip and host are in different byte order */
89 : typedef struct {
90 : uint ip; /* in network byte order */
91 : ushort port; /* in host byte order */
92 : } fd_topo_ip_port_t;
93 :
94 : struct fd_topo_net_tile {
95 : ulong umem_dcache_obj_id; /* dcache for XDP UMEM frames */
96 : uint bind_address;
97 :
98 : ushort shred_listen_port;
99 : ushort quic_transaction_listen_port;
100 : ushort legacy_transaction_listen_port;
101 : ushort gossip_listen_port;
102 : ushort repair_intake_listen_port;
103 : ushort repair_serve_listen_port;
104 : ushort send_src_port;
105 : };
106 : typedef struct fd_topo_net_tile fd_topo_net_tile_t;
107 :
108 : /* A tile is a unique process that is spawned by Firedancer to represent
109 : one thread of execution. Firedancer sandboxes all tiles to their own
110 : process for security reasons.
111 :
112 : A tile belongs to exactly one workspace. A tile is a consumer of 0
113 : or more links, it's inputs. A tile is a producer of 0 or more output
114 : links.
115 :
116 : All input links will be automatically polled by the tile
117 : infrastructure, and output links will automatically source and manage
118 : credits from consumers. */
119 : struct fd_topo_tile {
120 : ulong id; /* The ID of this tile. Indexed from [0, tile_cnt). When placed in a topology, the ID must be the index of the tile in the tiles list. */
121 : char name[ 7UL ]; /* The name of this tile. There can be multiple of each tile name in a topology. */
122 : char metrics_name[ 10UL ]; /* The name of this tile for looking up metrics. This is used so tiles can share a name but report different metrics, for Frankendancer and Firedancer. */
123 : ulong kind_id; /* The ID of this tile within its name. If there are n tile of a particular name, they have IDs [0, N). The pair (name, kind_id) uniquely identifies a tile, as does "id" on its own. */
124 : int is_agave; /* If the tile needs to run in the Agave (Anza) address space or not. */
125 : int allow_shutdown; /* If the tile is allowed to shutdown gracefully. If false, when the tile exits it will tear down the entire application. */
126 :
127 : ulong cpu_idx; /* The CPU index to pin the tile on. A value of ULONG_MAX or more indicates the tile should be floating and not pinned to a core. */
128 :
129 : ulong in_cnt; /* The number of links that this tile reads from. */
130 : ulong in_link_id[ FD_TOPO_MAX_TILE_IN_LINKS ]; /* The link_id of each link that this tile reads from, indexed in [0, in_cnt). */
131 : int in_link_reliable[ FD_TOPO_MAX_TILE_IN_LINKS ]; /* If each link that this tile reads from is a reliable or unreliable consumer, indexed in [0, in_cnt). */
132 : int in_link_poll[ FD_TOPO_MAX_TILE_IN_LINKS ]; /* If each link that this tile reads from should be polled by the tile infrastructure, indexed in [0, in_cnt).
133 : If the link is not polled, the tile will not receive frags for it and the tile writer is responsible for
134 : reading from the link. The link must be marked as unreliable as it is not flow controlled. */
135 :
136 : ulong out_cnt; /* The number of links that this tile writes to. */
137 : ulong out_link_id[ FD_TOPO_MAX_TILE_OUT_LINKS ]; /* The link_id of each link that this tile writes to, indexed in [0, link_cnt). */
138 :
139 : ulong tile_obj_id;
140 : ulong metrics_obj_id;
141 : ulong keyswitch_obj_id;
142 : ulong in_link_fseq_obj_id[ FD_TOPO_MAX_TILE_IN_LINKS ];
143 :
144 : ulong uses_obj_cnt;
145 : ulong uses_obj_id[ FD_TOPO_MAX_TILE_OBJS ];
146 : int uses_obj_mode[ FD_TOPO_MAX_TILE_OBJS ];
147 :
148 : /* Computed fields. These are not supplied as configuration but calculated as needed. */
149 : struct {
150 : ulong * metrics; /* The shared memory for metrics that this tile should write. Consumer by monitoring and metrics writing tiles. */
151 :
152 : /* The fseq of each link that this tile reads from. Multiple fseqs
153 : may point to the link, if there are multiple consumers. An fseq
154 : can be uniquely identified via (link_id, tile_id), or (link_kind,
155 : link_kind_id, tile_kind, tile_kind_id) */
156 : ulong * in_link_fseq[ FD_TOPO_MAX_TILE_IN_LINKS ];
157 : };
158 :
159 : /* Configuration fields. These are required to be known by the topology so it can determine the
160 : total size of Firedancer in memory. */
161 : union {
162 : fd_topo_net_tile_t net;
163 :
164 : struct {
165 : fd_topo_net_tile_t net;
166 :
167 : char if_virt[ 16 ]; /* device name (virtual, for routing) */
168 : char if_phys[ 16 ]; /* device name (physical, for RX/TX) */
169 : uint if_queue; /* device queue index */
170 :
171 : /* xdp specific options */
172 : ulong xdp_rx_queue_size;
173 : ulong xdp_tx_queue_size;
174 : ulong free_ring_depth;
175 : long tx_flush_timeout_ns;
176 : char xdp_mode[8];
177 : int zero_copy;
178 :
179 : ulong netdev_dbl_buf_obj_id; /* dbl_buf containing netdev_tbl */
180 : ulong fib4_main_obj_id; /* fib4 containing main route table */
181 : ulong fib4_local_obj_id; /* fib4 containing local route table */
182 : ulong neigh4_obj_id; /* neigh4 hash map */
183 : } xdp;
184 :
185 : struct {
186 : fd_topo_net_tile_t net;
187 : /* sock specific options */
188 : int so_sndbuf;
189 : int so_rcvbuf;
190 : } sock;
191 :
192 : struct {
193 : ulong netdev_dbl_buf_obj_id; /* dbl_buf containing netdev_tbl */
194 : ulong fib4_main_obj_id; /* fib4 containing main route table */
195 : ulong fib4_local_obj_id; /* fib4 containing local route table */
196 : char neigh_if[ 16 ]; /* neigh4 interface name */
197 : ulong neigh4_obj_id; /* neigh4 hash map */
198 : } netlink;
199 :
200 0 : #define FD_TOPO_GOSSIP_ENTRYPOINTS_MAX 16UL
201 :
202 : struct {
203 : char identity_key_path[ PATH_MAX ];
204 :
205 : ulong entrypoints_cnt;
206 : fd_ip4_port_t entrypoints[ FD_TOPO_GOSSIP_ENTRYPOINTS_MAX ];
207 :
208 : long boot_timestamp_nanos;
209 :
210 : ulong tcache_depth;
211 :
212 : ushort shred_version;
213 : int allow_private_address;
214 : } gossvf;
215 :
216 : struct {
217 : char identity_key_path[ PATH_MAX ];
218 :
219 : ulong entrypoints_cnt;
220 : fd_ip4_port_t entrypoints[ FD_TOPO_GOSSIP_ENTRYPOINTS_MAX ];
221 :
222 : long boot_timestamp_nanos;
223 :
224 : uint ip_addr;
225 : ushort shred_version;
226 :
227 : ulong max_entries;
228 : ulong max_purged;
229 : ulong max_failed;
230 :
231 : struct {
232 : ushort gossip;
233 : ushort tvu;
234 : ushort tvu_quic;
235 : ushort tpu;
236 : ushort tpu_quic;
237 : ushort repair;
238 : } ports;
239 : } gossip;
240 :
241 : struct {
242 : uint out_depth;
243 : uint reasm_cnt;
244 : ulong max_concurrent_connections;
245 : ulong max_concurrent_handshakes;
246 : ushort quic_transaction_listen_port;
247 : long idle_timeout_millis;
248 : uint ack_delay_millis;
249 : int retry;
250 : char key_log_path[ PATH_MAX ];
251 : } quic;
252 :
253 : struct {
254 : ulong tcache_depth;
255 : } verify;
256 :
257 : struct {
258 : ulong tcache_depth;
259 : } dedup;
260 :
261 : struct {
262 : char url[ 256 ];
263 : ulong url_len;
264 : char sni[ 256 ];
265 : ulong sni_len;
266 : char identity_key_path[ PATH_MAX ];
267 : char key_log_path[ PATH_MAX ];
268 : ulong buf_sz;
269 : ulong ssl_heap_sz;
270 : ulong keepalive_interval_nanos;
271 : uchar tls_cert_verify : 1;
272 : } bundle;
273 :
274 : struct {
275 : ulong max_pending_transactions;
276 : ulong bank_tile_count;
277 : int larger_max_cost_per_block;
278 : int larger_shred_limits_per_block;
279 : int use_consumed_cus;
280 : int schedule_strategy;
281 : struct {
282 : int enabled;
283 : uchar tip_distribution_program_addr[ 32 ];
284 : uchar tip_payment_program_addr[ 32 ];
285 : uchar tip_distribution_authority[ 32 ];
286 : ulong commission_bps;
287 : char identity_key_path[ PATH_MAX ];
288 : char vote_account_path[ PATH_MAX ]; /* or pubkey is okay */
289 : } bundle;
290 : } pack;
291 :
292 : struct {
293 : int lagged_consecutive_leader_start;
294 : int plugins_enabled;
295 : ulong bank_cnt;
296 : char identity_key_path[ PATH_MAX ];
297 : struct {
298 : int enabled;
299 : uchar tip_payment_program_addr[ 32 ];
300 : uchar tip_distribution_program_addr[ 32 ];
301 : char vote_account_path[ PATH_MAX ];
302 : } bundle;
303 : } poh;
304 :
305 : struct {
306 : ulong depth;
307 : ulong fec_resolver_depth;
308 : char identity_key_path[ PATH_MAX ];
309 : ushort shred_listen_port;
310 : int larger_shred_limits_per_block;
311 : ushort expected_shred_version;
312 : ulong adtl_dests_retransmit_cnt;
313 : fd_topo_ip_port_t adtl_dests_retransmit[ FD_TOPO_ADTL_DESTS_MAX ];
314 : ulong adtl_dests_leader_cnt;
315 : fd_topo_ip_port_t adtl_dests_leader[ FD_TOPO_ADTL_DESTS_MAX ];
316 : } shred;
317 :
318 : struct {
319 : ulong disable_blockstore_from_slot;
320 : } store;
321 :
322 : struct {
323 : char identity_key_path[ PATH_MAX ];
324 : } sign;
325 :
326 : struct {
327 : uint listen_addr;
328 : ushort listen_port;
329 :
330 : int is_voting;
331 :
332 : char cluster[ 32 ];
333 : char identity_key_path[ PATH_MAX ];
334 : char vote_key_path[ PATH_MAX ];
335 :
336 : ulong max_http_connections;
337 : ulong max_websocket_connections;
338 : ulong max_http_request_length;
339 : ulong send_buffer_size_mb;
340 : int schedule_strategy;
341 :
342 : int websocket_compression;
343 : int frontend_release_channel;
344 : } gui;
345 :
346 : struct {
347 : uint listen_addr;
348 : ushort listen_port;
349 :
350 : ulong max_http_connections;
351 : ulong send_buffer_size_mb;
352 : ulong max_http_request_length;
353 :
354 : ulong max_live_slots;
355 :
356 : char identity_key_path[ PATH_MAX ];
357 : } rpc;
358 :
359 : struct {
360 : uint prometheus_listen_addr;
361 : ushort prometheus_listen_port;
362 : } metric;
363 :
364 : struct {
365 : ulong fec_max;
366 : ulong max_vote_accounts;
367 :
368 : ulong funk_obj_id;
369 : ulong txncache_obj_id;
370 : ulong progcache_obj_id;
371 :
372 : char shred_cap[ PATH_MAX ];
373 :
374 : char identity_key_path[ PATH_MAX ];
375 : uint ip_addr;
376 : char vote_account_path[ PATH_MAX ];
377 :
378 : ushort expected_shred_version;
379 :
380 : ulong heap_size_gib;
381 : ulong max_live_slots;
382 :
383 : /* not specified in TOML */
384 :
385 : ulong enable_features_cnt;
386 : char enable_features[ 16 ][ FD_BASE58_ENCODED_32_SZ ];
387 :
388 : char genesis_path[ PATH_MAX ];
389 :
390 : int larger_max_cost_per_block;
391 :
392 : ulong capture_start_slot;
393 : char solcap_capture[ PATH_MAX ];
394 : char dump_proto_dir[ PATH_MAX ];
395 : int dump_block_to_pb;
396 :
397 : struct {
398 : int enabled;
399 : uchar tip_payment_program_addr[ 32 ];
400 : uchar tip_distribution_program_addr[ 32 ];
401 : char vote_account_path[ PATH_MAX ];
402 : } bundle;
403 :
404 : } replay;
405 :
406 : struct {
407 : ulong funk_obj_id;
408 : ulong txncache_obj_id;
409 : ulong progcache_obj_id;
410 :
411 : ulong max_live_slots;
412 :
413 : ulong capture_start_slot;
414 : char solcap_capture[ PATH_MAX ];
415 : char dump_proto_dir[ PATH_MAX ];
416 : int dump_instr_to_pb;
417 : int dump_txn_to_pb;
418 : int dump_syscall_to_pb;
419 : int dump_elf_to_pb;
420 : } exec;
421 :
422 : struct {
423 : ushort send_to_port;
424 : uint send_to_ip_addr;
425 : ulong conn_cnt;
426 : int no_quic;
427 : } benchs;
428 :
429 : struct {
430 : ushort rpc_port;
431 : uint rpc_ip_addr;
432 : } bencho;
433 :
434 : struct {
435 : ulong accounts_cnt;
436 : int mode;
437 : float contending_fraction;
438 : float cu_price_spread;
439 : } benchg;
440 :
441 : struct {
442 : ushort repair_intake_listen_port;
443 : ushort repair_serve_listen_port;
444 : char identity_key_path[ PATH_MAX ];
445 : ulong max_pending_shred_sets;
446 : ulong slot_max;
447 :
448 : /* non-config */
449 :
450 : ulong repair_sign_depth;
451 : ulong repair_sign_cnt;
452 :
453 : ulong end_slot; /* repair profiler mode only */
454 : } repair;
455 :
456 : struct {
457 : char slots_pending[PATH_MAX];
458 :
459 : ulong expected_shred_version;
460 :
461 : /* non-config */
462 :
463 : char identity_key_path[ PATH_MAX ];
464 : char shred_cap_archive[ PATH_MAX ];
465 : char shred_cap_replay[ PATH_MAX ];
466 : ulong shred_cap_end_slot;
467 :
468 : char blockstore_file[ PATH_MAX ];
469 : char blockstore_restore[ PATH_MAX ];
470 : } store_int;
471 :
472 : struct {
473 : ushort send_src_port;
474 :
475 : /* non-config */
476 :
477 : uint ip_addr;
478 : char identity_key_path[ PATH_MAX ];
479 : } send;
480 :
481 : struct {
482 : uint fake_dst_ip;
483 : } pktgen;
484 :
485 : struct {
486 : ulong end_slot;
487 : char rocksdb_path[ PATH_MAX ];
488 : char ingest_mode[ 32 ];
489 :
490 : /* Set internally by the archiver tile */
491 : int archive_fd;
492 : } archiver;
493 :
494 : struct {
495 : ulong end_slot;
496 : char rocksdb_path[ PATH_MAX ];
497 : char shredcap_path[ PATH_MAX ];
498 : } backtest;
499 :
500 : struct {
501 : int hard_fork_fatal;
502 : ulong max_live_slots;
503 : ulong max_vote_lookahead;
504 : char identity_key[ PATH_MAX ];
505 : char vote_account[ PATH_MAX ];
506 : char base_path[PATH_MAX];
507 : } tower;
508 :
509 : struct {
510 : char folder_path[ PATH_MAX ];
511 : ushort repair_intake_listen_port;
512 : ulong write_buffer_size; /* Size of the write buffer for the capture tile */
513 : int enable_publish_stake_weights;
514 : char manifest_path[ PATH_MAX ];
515 :
516 : /* Set internally by the capture tile */
517 : int shreds_fd;
518 : int requests_fd;
519 : int fecs_fd;
520 : int peers_fd;
521 : int bank_hashes_fd;
522 : int slices_fd;
523 : } shredcap;
524 :
525 : #define FD_TOPO_SNAPSHOTS_GOSSIP_LIST_MAX (32UL)
526 0 : #define FD_TOPO_SNAPSHOTS_SERVERS_MAX (16UL)
527 0 : #define FD_TOPO_MAX_RESOLVED_ADDRS ( 4UL)
528 0 : #define FD_TOPO_SNAPSHOTS_SERVERS_MAX_RESOLVED (FD_TOPO_MAX_RESOLVED_ADDRS*FD_TOPO_SNAPSHOTS_SERVERS_MAX)
529 :
530 : struct fd_topo_tile_snapct {
531 : char snapshots_path[ PATH_MAX ];
532 :
533 : struct {
534 : uint max_local_full_effective_age;
535 : uint max_local_incremental_age;
536 :
537 : struct {
538 : int allow_any;
539 : ulong allow_list_cnt;
540 : fd_pubkey_t allow_list[ FD_TOPO_SNAPSHOTS_GOSSIP_LIST_MAX ];
541 : ulong block_list_cnt;
542 : fd_pubkey_t block_list[ FD_TOPO_SNAPSHOTS_GOSSIP_LIST_MAX ];
543 : } gossip;
544 :
545 : ulong servers_cnt;
546 : struct {
547 : fd_ip4_port_t addr;
548 : char hostname[ 256UL ];
549 : int is_https;
550 : } servers[ FD_TOPO_SNAPSHOTS_SERVERS_MAX_RESOLVED ];
551 : } sources;
552 :
553 : int incremental_snapshots;
554 : uint max_full_snapshots_to_keep;
555 : uint max_incremental_snapshots_to_keep;
556 : uint full_effective_age_cancel_threshold;
557 : } snapct;
558 :
559 : struct {
560 : char snapshots_path[ PATH_MAX ];
561 : } snapld;
562 :
563 : struct {
564 : ulong max_live_slots;
565 : ulong funk_obj_id;
566 : ulong txncache_obj_id;
567 :
568 : uint lthash_disabled : 1;
569 : uint use_vinyl : 1;
570 : ulong vinyl_meta_map_obj_id;
571 : ulong vinyl_meta_pool_obj_id;
572 : ulong snapwr_depth;
573 : char vinyl_path[ PATH_MAX ];
574 : } snapin;
575 :
576 : struct {
577 : ulong dcache_obj_id;
578 : char vinyl_path[ PATH_MAX ];
579 : } snapwr;
580 :
581 : struct {
582 :
583 : uint bind_address;
584 : ushort bind_port;
585 :
586 : ushort expected_shred_version;
587 : ulong entrypoints_cnt;
588 : fd_ip4_port_t entrypoints[ FD_TOPO_GOSSIP_ENTRYPOINTS_MAX ];
589 : } ipecho;
590 :
591 : struct {
592 : ulong max_live_slots;
593 : ulong txncache_obj_id;
594 : ulong funk_obj_id;
595 : ulong progcache_obj_id;
596 : } bank;
597 :
598 : struct {
599 : ulong funk_obj_id;
600 : } resolv;
601 :
602 : struct {
603 : ulong funk_obj_id;
604 :
605 : int allow_download;
606 :
607 : ushort expected_shred_version;
608 : ulong entrypoints_cnt;
609 : fd_ip4_port_t entrypoints[ FD_TOPO_GOSSIP_ENTRYPOINTS_MAX ];
610 :
611 : int has_expected_genesis_hash;
612 : uchar expected_genesis_hash[ 32UL ];
613 :
614 : char genesis_path[ PATH_MAX ];
615 :
616 : uint target_gid;
617 : uint target_uid;
618 : } genesi;
619 :
620 : struct {
621 : ulong vinyl_meta_map_obj_id;
622 : ulong vinyl_meta_pool_obj_id;
623 : ulong vinyl_line_max;
624 : ulong vinyl_cnc_obj_id; /* optional */
625 : ulong vinyl_data_obj_id;
626 : char vinyl_bstream_path[ PATH_MAX ];
627 :
628 : int io_type; /* FD_VINYL_IO_TYPE_* */
629 : uint uring_depth;
630 : } vinyl;
631 :
632 : struct {
633 : ulong capture_start_slot;
634 : char solcap_capture[ PATH_MAX ];
635 : int recent_only;
636 : ulong recent_slots_per_file;
637 : } capctx;
638 : };
639 : };
640 :
641 : typedef struct fd_topo_tile fd_topo_tile_t;
642 :
643 : typedef struct {
644 : ulong id;
645 : char name[ 13UL ];
646 : ulong wksp_id;
647 :
648 : ulong offset;
649 : ulong footprint;
650 : } fd_topo_obj_t;
651 :
652 : /* An fd_topo_t represents the overall structure of a Firedancer
653 : configuration, describing all the workspaces, tiles, and links
654 : between them. */
655 : struct fd_topo {
656 : char app_name[ 256UL ];
657 : uchar props[ 16384UL ];
658 :
659 : ulong wksp_cnt;
660 : ulong link_cnt;
661 : ulong tile_cnt;
662 : ulong obj_cnt;
663 :
664 : fd_topo_wksp_t workspaces[ FD_TOPO_MAX_WKSPS ];
665 : fd_topo_link_t links[ FD_TOPO_MAX_LINKS ];
666 : fd_topo_tile_t tiles[ FD_TOPO_MAX_TILES ];
667 : fd_topo_obj_t objs[ FD_TOPO_MAX_OBJS ];
668 :
669 : ulong agave_affinity_cnt;
670 : ulong agave_affinity_cpu_idx[ FD_TILE_MAX ];
671 :
672 : ulong max_page_size; /* 2^21 or 2^30 */
673 : ulong gigantic_page_threshold; /* see [hugetlbfs.gigantic_page_threshold_mib]*/
674 : };
675 : typedef struct fd_topo fd_topo_t;
676 :
677 : typedef struct {
678 : char const * name;
679 :
680 : int keep_host_networking;
681 : int allow_connect;
682 : int allow_renameat;
683 : ulong rlimit_file_cnt;
684 : ulong rlimit_address_space;
685 : ulong rlimit_data;
686 : int for_tpool;
687 :
688 : ulong (*populate_allowed_seccomp)( fd_topo_t const * topo, fd_topo_tile_t const * tile, ulong out_cnt, struct sock_filter * out );
689 : ulong (*populate_allowed_fds )( fd_topo_t const * topo, fd_topo_tile_t const * tile, ulong out_fds_sz, int * out_fds );
690 : ulong (*scratch_align )( void );
691 : ulong (*scratch_footprint )( fd_topo_tile_t const * tile );
692 : ulong (*loose_footprint )( fd_topo_tile_t const * tile );
693 : void (*privileged_init )( fd_topo_t * topo, fd_topo_tile_t * tile );
694 : void (*unprivileged_init )( fd_topo_t * topo, fd_topo_tile_t * tile );
695 : void (*run )( fd_topo_t * topo, fd_topo_tile_t * tile );
696 : ulong (*rlimit_file_cnt_fn )( fd_topo_t const * topo, fd_topo_tile_t const * tile );
697 : } fd_topo_run_tile_t;
698 :
699 : struct fd_topo_obj_callbacks {
700 : char const * name;
701 : ulong (* footprint )( fd_topo_t const * topo, fd_topo_obj_t const * obj );
702 : ulong (* align )( fd_topo_t const * topo, fd_topo_obj_t const * obj );
703 : ulong (* loose )( fd_topo_t const * topo, fd_topo_obj_t const * obj );
704 : void (* new )( fd_topo_t const * topo, fd_topo_obj_t const * obj );
705 : };
706 :
707 : typedef struct fd_topo_obj_callbacks fd_topo_obj_callbacks_t;
708 :
709 : FD_PROTOTYPES_BEGIN
710 :
711 : FD_FN_CONST static inline ulong
712 0 : fd_topo_workspace_align( void ) {
713 : /* This needs to be the max( align ) of all the child members that
714 : could be aligned into this workspace, otherwise our footprint
715 : calculation will not be correct. For now just set to 4096 but this
716 : should probably be calculated dynamically, or we should reduce
717 : those child aligns if we can. */
718 0 : return 4096UL;
719 0 : }
720 :
721 : void *
722 : fd_topo_obj_laddr( fd_topo_t const * topo,
723 : ulong obj_id );
724 :
725 : /* Returns a pointer in the local address space to the base address of
726 : the workspace out of which the given object was allocated. */
727 :
728 : static inline void *
729 : fd_topo_obj_wksp_base( fd_topo_t const * topo,
730 0 : ulong obj_id ) {
731 0 : FD_TEST( obj_id<FD_TOPO_MAX_OBJS );
732 0 : fd_topo_obj_t const * obj = &topo->objs[ obj_id ];
733 0 : FD_TEST( obj->id == obj_id );
734 0 : ulong const wksp_id = obj->wksp_id;
735 :
736 0 : FD_TEST( wksp_id<FD_TOPO_MAX_WKSPS );
737 0 : fd_topo_wksp_t const * wksp = &topo->workspaces[ wksp_id ];
738 0 : FD_TEST( wksp->id == wksp_id );
739 0 : return wksp->wksp;
740 0 : }
741 :
742 : FD_FN_PURE static inline ulong
743 : fd_topo_tile_name_cnt( fd_topo_t const * topo,
744 3 : char const * name ) {
745 3 : ulong cnt = 0;
746 6 : for( ulong i=0; i<topo->tile_cnt; i++ ) {
747 3 : if( FD_UNLIKELY( !strcmp( topo->tiles[ i ].name, name ) ) ) cnt++;
748 3 : }
749 3 : return cnt;
750 3 : }
751 :
752 : /* Finds the workspace of a given name in the topology. Returns
753 : ULONG_MAX if there is no such workspace. There can be at most one
754 : workspace of a given name. */
755 :
756 : FD_FN_PURE static inline ulong
757 : fd_topo_find_wksp( fd_topo_t const * topo,
758 66 : char const * name ) {
759 66 : for( ulong i=0; i<topo->wksp_cnt; i++ ) {
760 66 : if( FD_UNLIKELY( !strcmp( topo->workspaces[ i ].name, name ) ) ) return i;
761 66 : }
762 0 : return ULONG_MAX;
763 66 : }
764 :
765 : /* Find the tile of a given name and kind_id in the topology, there will
766 : be at most one such tile, since kind_id is unique among the name.
767 : Returns ULONG_MAX if there is no such tile. */
768 :
769 : FD_FN_PURE static inline ulong
770 : fd_topo_find_tile( fd_topo_t const * topo,
771 : char const * name,
772 21 : ulong kind_id ) {
773 21 : for( ulong i=0; i<topo->tile_cnt; i++ ) {
774 21 : if( FD_UNLIKELY( !strcmp( topo->tiles[ i ].name, name ) ) && topo->tiles[ i ].kind_id == kind_id ) return i;
775 21 : }
776 0 : return ULONG_MAX;
777 21 : }
778 :
779 : /* Find the link of a given name and kind_id in the topology, there will
780 : be at most one such link, since kind_id is unique among the name.
781 : Returns ULONG_MAX if there is no such link. */
782 :
783 : FD_FN_PURE static inline ulong
784 : fd_topo_find_link( fd_topo_t const * topo,
785 : char const * name,
786 18 : ulong kind_id ) {
787 39 : for( ulong i=0; i<topo->link_cnt; i++ ) {
788 39 : if( FD_UNLIKELY( !strcmp( topo->links[ i ].name, name ) ) && topo->links[ i ].kind_id == kind_id ) return i;
789 39 : }
790 0 : return ULONG_MAX;
791 18 : }
792 :
793 : FD_FN_PURE static inline ulong
794 : fd_topo_find_tile_in_link( fd_topo_t const * topo,
795 : fd_topo_tile_t const * tile,
796 : char const * name,
797 0 : ulong kind_id ) {
798 0 : for( ulong i=0; i<tile->in_cnt; i++ ) {
799 0 : if( FD_UNLIKELY( !strcmp( topo->links[ tile->in_link_id[ i ] ].name, name ) )
800 0 : && topo->links[ tile->in_link_id[ i ] ].kind_id == kind_id ) return i;
801 0 : }
802 0 : return ULONG_MAX;
803 0 : }
804 :
805 : FD_FN_PURE static inline ulong
806 : fd_topo_find_tile_out_link( fd_topo_t const * topo,
807 : fd_topo_tile_t const * tile,
808 : char const * name,
809 0 : ulong kind_id ) {
810 0 : for( ulong i=0; i<tile->out_cnt; i++ ) {
811 0 : if( FD_UNLIKELY( !strcmp( topo->links[ tile->out_link_id[ i ] ].name, name ) )
812 0 : && topo->links[ tile->out_link_id[ i ] ].kind_id == kind_id ) return i;
813 0 : }
814 0 : return ULONG_MAX;
815 0 : }
816 :
817 : /* Find the id of the tile which is a producer for the given link. If
818 : no tile is a producer for the link, returns ULONG_MAX. This should
819 : not be possible for a well formed and validated topology. */
820 : FD_FN_PURE static inline ulong
821 : fd_topo_find_link_producer( fd_topo_t const * topo,
822 0 : fd_topo_link_t const * link ) {
823 0 : for( ulong i=0; i<topo->tile_cnt; i++ ) {
824 0 : fd_topo_tile_t const * tile = &topo->tiles[ i ];
825 :
826 0 : for( ulong j=0; j<tile->out_cnt; j++ ) {
827 0 : if( FD_UNLIKELY( tile->out_link_id[ j ] == link->id ) ) return i;
828 0 : }
829 0 : }
830 0 : return ULONG_MAX;
831 0 : }
832 :
833 : /* Given a link, count the number of consumers of that link among all
834 : the tiles in the topology. */
835 : FD_FN_PURE static inline ulong
836 : fd_topo_link_consumer_cnt( fd_topo_t const * topo,
837 0 : fd_topo_link_t const * link ) {
838 0 : ulong cnt = 0;
839 0 : for( ulong i=0; i<topo->tile_cnt; i++ ) {
840 0 : fd_topo_tile_t const * tile = &topo->tiles[ i ];
841 0 : for( ulong j=0; j<tile->in_cnt; j++ ) {
842 0 : if( FD_UNLIKELY( tile->in_link_id[ j ] == link->id ) ) cnt++;
843 0 : }
844 0 : }
845 :
846 0 : return cnt;
847 0 : }
848 :
849 : /* Given a link, count the number of reliable consumers of that link
850 : among all the tiles in the topology. */
851 : FD_FN_PURE static inline ulong
852 : fd_topo_link_reliable_consumer_cnt( fd_topo_t const * topo,
853 0 : fd_topo_link_t const * link ) {
854 0 : ulong cnt = 0;
855 0 : for( ulong i=0; i<topo->tile_cnt; i++ ) {
856 0 : fd_topo_tile_t const * tile = &topo->tiles[ i ];
857 0 : for( ulong j=0; j<tile->in_cnt; j++ ) {
858 0 : if( FD_UNLIKELY( tile->in_link_id[ j ] == link->id && tile->in_link_reliable[ j ] ) ) cnt++;
859 0 : }
860 0 : }
861 :
862 0 : return cnt;
863 0 : }
864 :
865 : FD_FN_PURE static inline ulong
866 : fd_topo_tile_consumer_cnt( fd_topo_t const * topo,
867 0 : fd_topo_tile_t const * tile ) {
868 0 : (void)topo;
869 0 : return tile->out_cnt;
870 0 : }
871 :
872 : FD_FN_PURE static inline ulong
873 : fd_topo_tile_reliable_consumer_cnt( fd_topo_t const * topo,
874 0 : fd_topo_tile_t const * tile ) {
875 0 : ulong reliable_cons_cnt = 0UL;
876 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
877 0 : fd_topo_tile_t const * consumer_tile = &topo->tiles[ i ];
878 0 : for( ulong j=0UL; j<consumer_tile->in_cnt; j++ ) {
879 0 : for( ulong k=0UL; k<tile->out_cnt; k++ ) {
880 0 : if( FD_UNLIKELY( consumer_tile->in_link_id[ j ]==tile->out_link_id[ k ] && consumer_tile->in_link_reliable[ j ] ) ) {
881 0 : reliable_cons_cnt++;
882 0 : }
883 0 : }
884 0 : }
885 0 : }
886 0 : return reliable_cons_cnt;
887 0 : }
888 :
889 : FD_FN_PURE static inline ulong
890 : fd_topo_tile_producer_cnt( fd_topo_t const * topo,
891 0 : fd_topo_tile_t const * tile ) {
892 0 : (void)topo;
893 0 : ulong in_cnt = 0UL;
894 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
895 0 : if( FD_UNLIKELY( !tile->in_link_poll[ i ] ) ) continue;
896 0 : in_cnt++;
897 0 : }
898 0 : return in_cnt;
899 0 : }
900 :
901 : /* Join (map into the process) all shared memory (huge/gigantic pages)
902 : needed by the tile, in the given topology. All memory associated
903 : with the tile (aka. used by links that the tile either produces to or
904 : consumes from, or used by the tile itself for its cnc) will be
905 : attached (mapped into the process).
906 :
907 : This is needed to play nicely with the sandbox. Once a process is
908 : sandboxed we can no longer map any memory. */
909 : void
910 : fd_topo_join_tile_workspaces( fd_topo_t * topo,
911 : fd_topo_tile_t * tile );
912 :
913 : /* Join (map into the process) the shared memory (huge/gigantic pages)
914 : for the given workspace. Mode is one of
915 : FD_SHMEM_JOIN_MODE_READ_WRITE or FD_SHMEM_JOIN_MODE_READ_ONLY and
916 : determines the prot argument that will be passed to mmap when mapping
917 : the pages in (PROT_WRITE or PROT_READ respectively). */
918 : void
919 : fd_topo_join_workspace( fd_topo_t * topo,
920 : fd_topo_wksp_t * wksp,
921 : int mode );
922 :
923 : /* Join (map into the process) all shared memory (huge/gigantic pages)
924 : needed by all tiles in the topology. Mode is one of
925 : FD_SHMEM_JOIN_MODE_READ_WRITE or FD_SHMEM_JOIN_MODE_READ_ONLY and
926 : determines the prot argument that will be passed to mmap when
927 : mapping the pages in (PROT_WRITE or PROT_READ respectively). */
928 : void
929 : fd_topo_join_workspaces( fd_topo_t * topo,
930 : int mode );
931 :
932 : /* Leave (unmap from the process) the shared memory needed for the
933 : given workspace in the topology, if it was previously mapped.
934 :
935 : topo and wksp are assumed non-NULL. It is OK if the workspace
936 : has not been previously joined, in which case this is a no-op. */
937 :
938 : void
939 : fd_topo_leave_workspace( fd_topo_t * topo,
940 : fd_topo_wksp_t * wksp );
941 :
942 : /* Leave (unmap from the process) all shared memory needed by all
943 : tiles in the topology, if each of them was mapped.
944 :
945 : topo is assumed non-NULL. Only workspaces which were previously
946 : joined are unmapped. */
947 :
948 : void
949 : fd_topo_leave_workspaces( fd_topo_t * topo );
950 :
951 : /* Create the given workspace needed by the topology on the system.
952 : This does not "join" the workspaces (map their memory into the
953 : process), but only creates the .wksp file and formats it correctly
954 : as a workspace.
955 :
956 : Returns 0 on success and -1 on failure, with errno set to the error.
957 : The only reason for failure currently that will be returned is
958 : ENOMEM, as other unexpected errors will cause the program to exit.
959 :
960 : If update_existing is 1, the workspace will not be created from
961 : scratch but it will be assumed that it already exists from a prior
962 : run and needs to be maybe resized and then have the header
963 : structures reinitialized. This can save a very expensive operation
964 : of zeroing all of the workspace pages. This is dangerous in
965 : production because it can leave stray memory from prior runs around,
966 : and should only be used in development environments. */
967 :
968 : int
969 : fd_topo_create_workspace( fd_topo_t * topo,
970 : fd_topo_wksp_t * wksp,
971 : int update_existing );
972 :
973 : /* Join the standard IPC objects needed by the topology of this particular
974 : tile */
975 :
976 : void
977 : fd_topo_fill_tile( fd_topo_t * topo,
978 : fd_topo_tile_t * tile );
979 :
980 : /* Same as fd_topo_fill_tile but fills in all the objects for a
981 : particular workspace with the given mode. */
982 : void
983 : fd_topo_workspace_fill( fd_topo_t * topo,
984 : fd_topo_wksp_t * wksp );
985 :
986 : /* Apply a new function to every object that is resident in the given
987 : workspace in the topology. */
988 :
989 : void
990 : fd_topo_wksp_new( fd_topo_t const * topo,
991 : fd_topo_wksp_t const * wksp,
992 : fd_topo_obj_callbacks_t ** callbacks );
993 :
994 : /* Same as fd_topo_fill_tile but fills in all tiles in the topology. */
995 :
996 : void
997 : fd_topo_fill( fd_topo_t * topo );
998 :
999 : /* fd_topo_tile_stack_join joins a huge page optimized stack for the
1000 : provided tile. The stack is assumed to already exist at a known
1001 : path in the hugetlbfs mount. */
1002 :
1003 : void *
1004 : fd_topo_tile_stack_join( char const * app_name,
1005 : char const * tile_name,
1006 : ulong tile_kind_id );
1007 :
1008 : /* fd_topo_run_single_process runs all the tiles in a single process
1009 : (the calling process). This spawns a thread for each tile, switches
1010 : that thread to the given UID and GID and then runs the tile in it.
1011 : Each thread will never exit, as tiles are expected to run forever.
1012 : An error is logged and the application will exit if a tile exits.
1013 : The function itself does return after spawning all the threads.
1014 :
1015 : The threads will not be sandboxed in any way, except switching to the
1016 : provided UID and GID, so they will share the same address space, and
1017 : not have any seccomp restrictions or use any Linux namespaces. The
1018 : calling thread will also switch to the provided UID and GID before
1019 : it returns.
1020 :
1021 : In production, when running with an Agave child process this is
1022 : used for spawning certain tiles inside the Agave address space.
1023 : It's also useful for tooling and debugging, but is not how the main
1024 : production Firedancer process runs. For production, each tile is run
1025 : in its own address space with a separate process and full security
1026 : sandbox.
1027 :
1028 : The agave argument determines which tiles are started. If the
1029 : argument is 0 or 1, only non-agave (or only agave) tiles are started.
1030 : If the argument is any other value, all tiles in the topology are
1031 : started regardless of if they are Agave tiles or not. */
1032 :
1033 : void
1034 : fd_topo_run_single_process( fd_topo_t * topo,
1035 : int agave,
1036 : uint uid,
1037 : uint gid,
1038 : fd_topo_run_tile_t (* tile_run )( fd_topo_tile_t const * tile ) );
1039 :
1040 : /* fd_topo_run_tile runs the given tile directly within the current
1041 : process (and thread). The function will never return, as tiles are
1042 : expected to run forever. An error is logged and the application will
1043 : exit if the tile exits.
1044 :
1045 : The sandbox argument determines if the current process will be
1046 : sandboxed fully before starting the tile. The thread will switch to
1047 : the UID and GID provided before starting the tile, even if the thread
1048 : is not being sandboxed. Although POSIX specifies that all threads in
1049 : a process must share a UID and GID, this is not the case on Linux.
1050 : The thread will switch to the provided UID and GID without switching
1051 : the other threads in the process.
1052 :
1053 : If keep_controlling_terminal is set to 0, and the sandbox is enabled
1054 : the controlling terminal will be detached as an additional sandbox
1055 : measure, but you will not be able to send Ctrl+C or other signals
1056 : from the terminal. See fd_sandbox.h for more information.
1057 :
1058 : The allow_fd argument is only used if sandbox is true, and is a file
1059 : descriptor which will be allowed to exist in the process. Normally
1060 : the sandbox code rejects and aborts if there is an unexpected file
1061 : descriptor present on boot. This is helpful to allow a parent
1062 : process to be notified on termination of the tile by waiting for a
1063 : pipe file descriptor to get closed.
1064 :
1065 : wait and debugger are both used in debugging. If wait is non-NULL,
1066 : the runner will wait until the value pointed to by wait is non-zero
1067 : before launching the tile. Likewise, if debugger is non-NULL, the
1068 : runner will wait until a debugger is attached before setting the
1069 : value pointed to by debugger to non-zero. These are intended to be
1070 : used as a pair, where many tiles share a waiting reference, and then
1071 : one of the tiles (a tile you want to attach the debugger to) has the
1072 : same reference provided as the debugger, so all tiles will stop and
1073 : wait for the debugger to attach to it before proceeding. */
1074 :
1075 : void
1076 : fd_topo_run_tile( fd_topo_t * topo,
1077 : fd_topo_tile_t * tile,
1078 : int sandbox,
1079 : int keep_controlling_terminal,
1080 : int dumpable,
1081 : uint uid,
1082 : uint gid,
1083 : int allow_fd,
1084 : volatile int * wait,
1085 : volatile int * debugger,
1086 : fd_topo_run_tile_t * tile_run );
1087 :
1088 : /* This is for determining the value of RLIMIT_MLOCK that we need to
1089 : successfully run all tiles in separate processes. The value returned
1090 : is the maximum amount of memory that will be locked with mlock() by
1091 : any individual process in the tree. Specifically, if we have three
1092 : tile processes, and they each need to lock 5, 9, and 2 MiB of memory
1093 : respectively, RLIMIT_MLOCK needs to be 9 MiB to allow all three
1094 : process mlock() calls to succeed.
1095 :
1096 : Tiles lock memory in three ways. Any workspace they are using, they
1097 : lock the entire workspace. Then each tile uses huge pages for the
1098 : stack which are also locked, and finally some tiles use private
1099 : locked mmaps outside the workspace for storing key material. The
1100 : results here include all of this memory together.
1101 :
1102 : The result is not necessarily the amount of memory used by the tile
1103 : process, although it will be quite close. Tiles could potentially
1104 : allocate memory (eg, with brk) without needing to lock it, which
1105 : would not need to included, and some kernel memory that tiles cause
1106 : to be allocated (for example XSK buffers) is also not included. The
1107 : actual amount of memory used will not be less than this value. */
1108 : FD_FN_PURE ulong
1109 : fd_topo_mlock_max_tile( fd_topo_t const * topo );
1110 :
1111 : /* Same as fd_topo_mlock_max_tile, but for loading the entire topology
1112 : into one process, rather than a separate process per tile. This is
1113 : used, for example, by the configuration code when it creates all the
1114 : workspaces, or the monitor that maps the entire system into one
1115 : address space. */
1116 : FD_FN_PURE ulong
1117 : fd_topo_mlock( fd_topo_t const * topo );
1118 :
1119 : /* This returns the number of gigantic pages needed by the topology on
1120 : the provided numa node. It includes pages needed by the workspaces,
1121 : as well as additional allocations like huge pages for process stacks
1122 : and private key storage. */
1123 :
1124 : FD_FN_PURE ulong
1125 : fd_topo_gigantic_page_cnt( fd_topo_t const * topo,
1126 : ulong numa_idx );
1127 :
1128 : /* This returns the number of huge pages in the application needed by
1129 : the topology on the provided numa node. It includes pages needed by
1130 : things placed in the hugetlbfs (workspaces, process stacks). If
1131 : include_anonymous is true, it also includes anonymous hugepages which
1132 : are needed but are not placed in the hugetlbfs. */
1133 :
1134 : FD_FN_PURE ulong
1135 : fd_topo_huge_page_cnt( fd_topo_t const * topo,
1136 : ulong numa_idx,
1137 : int include_anonymous );
1138 :
1139 : /* Prints a message describing the topology to an output stream. If
1140 : stdout is true, will be written to stdout, otherwise will be written
1141 : as a NOTICE log message to the log file. */
1142 : void
1143 : fd_topo_print_log( int stdout,
1144 : fd_topo_t * topo );
1145 :
1146 : FD_PROTOTYPES_END
1147 :
1148 : #endif /* HEADER_fd_src_disco_topo_fd_topo_h */
|