Line data Source code
1 : #ifndef HEADER_fd_src_disco_topo_fd_topo_h
2 : #define HEADER_fd_src_disco_topo_fd_topo_h
3 :
4 : #include "../stem/fd_stem.h"
5 : #include "../../tango/fd_tango.h"
6 : #include "../../waltz/xdp/fd_xdp1.h"
7 : #include "../../ballet/base58/fd_base58.h"
8 : #include "../../util/net/fd_net_headers.h"
9 :
10 : /* Maximum number of workspaces that may be present in a topology. */
11 : #define FD_TOPO_MAX_WKSPS (256UL)
12 : /* Maximum number of links that may be present in a topology. */
13 : #define FD_TOPO_MAX_LINKS (256UL)
14 : /* Maximum number of tiles that may be present in a topology. */
15 0 : #define FD_TOPO_MAX_TILES (256UL)
16 : /* Maximum number of objects that may be present in a topology. */
17 : #define FD_TOPO_MAX_OBJS (4096UL)
18 : /* Maximum number of links that may go into any one tile in the
19 : topology. */
20 : #define FD_TOPO_MAX_TILE_IN_LINKS ( 128UL)
21 : /* Maximum number of links that a tile may write to. */
22 : #define FD_TOPO_MAX_TILE_OUT_LINKS ( 32UL)
23 : /* Maximum number of objects that a tile can use. */
24 : #define FD_TOPO_MAX_TILE_OBJS ( 256UL)
25 :
26 : /* Maximum number of additional ip addresses */
27 : #define FD_NET_MAX_SRC_ADDR 4
28 :
29 : /* Maximum number of additional destinations for leader shreds and for retransmitted shreds */
30 : #define FD_TOPO_ADTL_DESTS_MAX ( 32UL)
31 :
32 :
33 : /* A workspace is a Firedancer specific memory management structure that
34 : sits on top of 1 or more memory mapped gigantic or huge pages mounted
35 : to the hugetlbfs. */
36 : typedef struct {
37 : ulong id; /* The ID of this workspace. Indexed from [0, wksp_cnt). When placed in a topology, the ID must be the index of the workspace in the workspaces list. */
38 : char name[ 13UL ]; /* The name of this workspace, like "pack". There can be at most one of each workspace name in a topology. */
39 :
40 : ulong numa_idx; /* The index of the NUMA node on the system that this workspace should be allocated from. */
41 :
42 : ulong min_part_max; /* Artificially raise part_max */
43 : ulong min_loose_sz; /* Artificially raise loose footprint */
44 :
45 : /* Computed fields. These are not supplied as configuration but calculated as needed. */
46 : struct {
47 : ulong page_sz; /* The size of the pages that this workspace is backed by. One of FD_PAGE_SIZE_*. */
48 : ulong page_cnt; /* The number of pages that must be mapped to this workspace to store all the data needed by consumers. */
49 : ulong part_max; /* The maximum number of partitions in the underlying workspace. There can only be this many allocations made at any one time. */
50 :
51 : fd_wksp_t * wksp; /* The workspace memory in the local process. */
52 : ulong known_footprint; /* Total size in bytes of all data in Firedancer that will be stored in this workspace at startup. */
53 : ulong total_footprint; /* Total size in bytes of all data in Firedancer that could be stored in this workspace, includes known data and loose data. */
54 : };
55 : } fd_topo_wksp_t;
56 :
57 : /* A link is an mcache in a workspace that has one producer and one or
58 : more consumers. A link may optionally also have a dcache, that holds
59 : fragments referred to by the mcache entries.
60 :
61 : A link belongs to exactly one workspace. A link has exactly one
62 : producer, and 1 or more consumers. Each consumer is either reliable
63 : or not reliable. A link has a depth and a MTU, which correspond to
64 : the depth and MTU of the mcache and dcache respectively. A MTU of
65 : zero means no dcache is needed, as there is no data. */
66 : typedef struct {
67 : ulong id; /* The ID of this link. Indexed from [0, link_cnt). When placed in a topology, the ID must be the index of the link in the links list. */
68 : char name[ 13UL ]; /* The name of this link, like "pack_bank". There can be multiple of each link name in a topology. */
69 : ulong kind_id; /* The ID of this link within its name. If there are N links of a particular name, they have IDs [0, N). The pair (name, kind_id) uniquely identifies a link, as does "id" on its own. */
70 :
71 : ulong depth; /* The depth of the mcache representing the link. */
72 : ulong mtu; /* The MTU of data fragments in the mcache. A value of 0 means there is no dcache. */
73 : ulong burst; /* The max amount of MTU sized data fragments that might be bursted to the dcache. */
74 :
75 : ulong mcache_obj_id;
76 : ulong dcache_obj_id;
77 :
78 : /* Computed fields. These are not supplied as configuration but calculated as needed. */
79 : struct {
80 : fd_frag_meta_t * mcache; /* The mcache of this link. */
81 : void * dcache; /* The dcache of this link, if it has one. */
82 : };
83 :
84 : uint permit_no_consumers : 1; /* Permit a topology where this link has no consumers */
85 : uint permit_no_producers : 1; /* Permit a topology where this link has no producers */
86 : } fd_topo_link_t;
87 :
88 : /* Be careful: ip and host are in different byte order */
89 : typedef struct {
90 : uint ip; /* in network byte order */
91 : ushort port; /* in host byte order */
92 : } fd_topo_ip_port_t;
93 :
94 : struct fd_topo_net_tile {
95 : ulong umem_dcache_obj_id; /* dcache for XDP UMEM frames */
96 : uint bind_address;
97 :
98 : ushort shred_listen_port;
99 : ushort quic_transaction_listen_port;
100 : ushort legacy_transaction_listen_port;
101 : ushort gossip_listen_port;
102 : ushort repair_intake_listen_port;
103 : ushort repair_serve_listen_port;
104 : ushort send_src_port;
105 : };
106 : typedef struct fd_topo_net_tile fd_topo_net_tile_t;
107 :
108 : /* A tile is a unique process that is spawned by Firedancer to represent
109 : one thread of execution. Firedancer sandboxes all tiles to their own
110 : process for security reasons.
111 :
112 : A tile belongs to exactly one workspace. A tile is a consumer of 0
113 : or more links, it's inputs. A tile is a producer of 0 or more output
114 : links.
115 :
116 : All input links will be automatically polled by the tile
117 : infrastructure, and output links will automatically source and manage
118 : credits from consumers. */
119 : struct fd_topo_tile {
120 : ulong id; /* The ID of this tile. Indexed from [0, tile_cnt). When placed in a topology, the ID must be the index of the tile in the tiles list. */
121 : char name[ 7UL ]; /* The name of this tile. There can be multiple of each tile name in a topology. */
122 : char metrics_name[ 10UL ]; /* The name of this tile for looking up metrics. This is used so tiles can share a name but report different metrics, for Frankendancer and Firedancer. */
123 : ulong kind_id; /* The ID of this tile within its name. If there are n tile of a particular name, they have IDs [0, N). The pair (name, kind_id) uniquely identifies a tile, as does "id" on its own. */
124 : int is_agave; /* If the tile needs to run in the Agave (Anza) address space or not. */
125 : int allow_shutdown; /* If the tile is allowed to shutdown gracefully. If false, when the tile exits it will tear down the entire application. */
126 :
127 : ulong cpu_idx; /* The CPU index to pin the tile on. A value of ULONG_MAX or more indicates the tile should be floating and not pinned to a core. */
128 :
129 : ulong in_cnt; /* The number of links that this tile reads from. */
130 : ulong in_link_id[ FD_TOPO_MAX_TILE_IN_LINKS ]; /* The link_id of each link that this tile reads from, indexed in [0, in_cnt). */
131 : int in_link_reliable[ FD_TOPO_MAX_TILE_IN_LINKS ]; /* If each link that this tile reads from is a reliable or unreliable consumer, indexed in [0, in_cnt). */
132 : int in_link_poll[ FD_TOPO_MAX_TILE_IN_LINKS ]; /* If each link that this tile reads from should be polled by the tile infrastructure, indexed in [0, in_cnt).
133 : If the link is not polled, the tile will not receive frags for it and the tile writer is responsible for
134 : reading from the link. The link must be marked as unreliable as it is not flow controlled. */
135 :
136 : ulong out_cnt; /* The number of links that this tile writes to. */
137 : ulong out_link_id[ FD_TOPO_MAX_TILE_OUT_LINKS ]; /* The link_id of each link that this tile writes to, indexed in [0, link_cnt). */
138 :
139 : ulong tile_obj_id;
140 : ulong metrics_obj_id;
141 : ulong keyswitch_obj_id;
142 : ulong in_link_fseq_obj_id[ FD_TOPO_MAX_TILE_IN_LINKS ];
143 :
144 : ulong uses_obj_cnt;
145 : ulong uses_obj_id[ FD_TOPO_MAX_TILE_OBJS ];
146 : int uses_obj_mode[ FD_TOPO_MAX_TILE_OBJS ];
147 :
148 : /* Computed fields. These are not supplied as configuration but calculated as needed. */
149 : struct {
150 : ulong * metrics; /* The shared memory for metrics that this tile should write. Consumer by monitoring and metrics writing tiles. */
151 :
152 : /* The fseq of each link that this tile reads from. Multiple fseqs
153 : may point to the link, if there are multiple consumers. An fseq
154 : can be uniquely identified via (link_id, tile_id), or (link_kind,
155 : link_kind_id, tile_kind, tile_kind_id) */
156 : ulong * in_link_fseq[ FD_TOPO_MAX_TILE_IN_LINKS ];
157 : };
158 :
159 : /* Configuration fields. These are required to be known by the topology so it can determine the
160 : total size of Firedancer in memory. */
161 : union {
162 : fd_topo_net_tile_t net;
163 :
164 : struct {
165 : fd_topo_net_tile_t net;
166 :
167 : char if_virt[ 16 ]; /* device name (virtual, for routing) */
168 : char if_phys[ 16 ]; /* device name (physical, for RX/TX) */
169 : uint if_queue; /* device queue index */
170 :
171 : /* xdp specific options */
172 : ulong xdp_rx_queue_size;
173 : ulong xdp_tx_queue_size;
174 : ulong free_ring_depth;
175 : long tx_flush_timeout_ns;
176 : char xdp_mode[8];
177 : int zero_copy;
178 :
179 : ulong netdev_dbl_buf_obj_id; /* dbl_buf containing netdev_tbl */
180 : ulong fib4_main_obj_id; /* fib4 containing main route table */
181 : ulong fib4_local_obj_id; /* fib4 containing local route table */
182 : ulong neigh4_obj_id; /* neigh4 hash map header */
183 : ulong neigh4_ele_obj_id; /* neigh4 hash map slots */
184 : } xdp;
185 :
186 : struct {
187 : fd_topo_net_tile_t net;
188 : /* sock specific options */
189 : int so_sndbuf;
190 : int so_rcvbuf;
191 : } sock;
192 :
193 : struct {
194 : ulong netdev_dbl_buf_obj_id; /* dbl_buf containing netdev_tbl */
195 : ulong fib4_main_obj_id; /* fib4 containing main route table */
196 : ulong fib4_local_obj_id; /* fib4 containing local route table */
197 : char neigh_if[ 16 ]; /* neigh4 interface name */
198 : ulong neigh4_obj_id; /* neigh4 hash map header */
199 : ulong neigh4_ele_obj_id; /* neigh4 hash map slots */
200 : } netlink;
201 :
202 0 : #define FD_TOPO_GOSSIP_ENTRYPOINTS_MAX 16UL
203 :
204 : struct {
205 : char identity_key_path[ PATH_MAX ];
206 :
207 : ulong entrypoints_cnt;
208 : fd_ip4_port_t entrypoints[ FD_TOPO_GOSSIP_ENTRYPOINTS_MAX ];
209 :
210 : long boot_timestamp_nanos;
211 :
212 : ulong tcache_depth;
213 :
214 : ushort shred_version;
215 : int allow_private_address;
216 : } gossvf;
217 :
218 : struct {
219 : char identity_key_path[ PATH_MAX ];
220 :
221 : ulong entrypoints_cnt;
222 : fd_ip4_port_t entrypoints[ FD_TOPO_GOSSIP_ENTRYPOINTS_MAX ];
223 :
224 : long boot_timestamp_nanos;
225 :
226 : uint ip_addr;
227 : ushort shred_version;
228 :
229 : ulong max_entries;
230 : ulong max_purged;
231 : ulong max_failed;
232 :
233 : struct {
234 : ushort gossip;
235 : ushort tvu;
236 : ushort tvu_quic;
237 : ushort tpu;
238 : ushort tpu_quic;
239 : ushort repair;
240 : } ports;
241 : } gossip;
242 :
243 : struct {
244 : uint out_depth;
245 : uint reasm_cnt;
246 : ulong max_concurrent_connections;
247 : ulong max_concurrent_handshakes;
248 : ushort quic_transaction_listen_port;
249 : long idle_timeout_millis;
250 : uint ack_delay_millis;
251 : int retry;
252 : char key_log_path[ PATH_MAX ];
253 : } quic;
254 :
255 : struct {
256 : ulong tcache_depth;
257 : } verify;
258 :
259 : struct {
260 : ulong tcache_depth;
261 : } dedup;
262 :
263 : struct {
264 : char url[ 256 ];
265 : ulong url_len;
266 : char sni[ 256 ];
267 : ulong sni_len;
268 : char identity_key_path[ PATH_MAX ];
269 : char key_log_path[ PATH_MAX ];
270 : ulong buf_sz;
271 : ulong ssl_heap_sz;
272 : ulong keepalive_interval_nanos;
273 : uchar tls_cert_verify : 1;
274 : } bundle;
275 :
276 : struct {
277 : ulong max_pending_transactions;
278 : ulong bank_tile_count;
279 : int larger_max_cost_per_block;
280 : int larger_shred_limits_per_block;
281 : int use_consumed_cus;
282 : int schedule_strategy;
283 : struct {
284 : int enabled;
285 : uchar tip_distribution_program_addr[ 32 ];
286 : uchar tip_payment_program_addr[ 32 ];
287 : uchar tip_distribution_authority[ 32 ];
288 : ulong commission_bps;
289 : char identity_key_path[ PATH_MAX ];
290 : char vote_account_path[ PATH_MAX ]; /* or pubkey is okay */
291 : } bundle;
292 : } pack;
293 :
294 : struct {
295 : int lagged_consecutive_leader_start;
296 : int plugins_enabled;
297 : ulong bank_cnt;
298 : char identity_key_path[ PATH_MAX ];
299 : struct {
300 : int enabled;
301 : uchar tip_payment_program_addr[ 32 ];
302 : uchar tip_distribution_program_addr[ 32 ];
303 : char vote_account_path[ PATH_MAX ];
304 : } bundle;
305 : } poh;
306 :
307 : struct {
308 : ulong depth;
309 : ulong fec_resolver_depth;
310 : char identity_key_path[ PATH_MAX ];
311 : ushort shred_listen_port;
312 : int larger_shred_limits_per_block;
313 : ushort expected_shred_version;
314 : ulong adtl_dests_retransmit_cnt;
315 : fd_topo_ip_port_t adtl_dests_retransmit[ FD_TOPO_ADTL_DESTS_MAX ];
316 : ulong adtl_dests_leader_cnt;
317 : fd_topo_ip_port_t adtl_dests_leader[ FD_TOPO_ADTL_DESTS_MAX ];
318 : } shred;
319 :
320 : struct {
321 : ulong disable_blockstore_from_slot;
322 : } store;
323 :
324 : struct {
325 : char identity_key_path[ PATH_MAX ];
326 : } sign;
327 :
328 : struct {
329 : uint listen_addr;
330 : ushort listen_port;
331 :
332 : int is_voting;
333 :
334 : char cluster[ 32 ];
335 : char identity_key_path[ PATH_MAX ];
336 : char vote_key_path[ PATH_MAX ];
337 :
338 : ulong max_http_connections;
339 : ulong max_websocket_connections;
340 : ulong max_http_request_length;
341 : ulong send_buffer_size_mb;
342 : int schedule_strategy;
343 :
344 : int websocket_compression;
345 : int frontend_release_channel;
346 : } gui;
347 :
348 : struct {
349 : uint listen_addr;
350 : ushort listen_port;
351 :
352 : ulong max_http_connections;
353 : ulong send_buffer_size_mb;
354 : ulong max_http_request_length;
355 :
356 : ulong max_live_slots;
357 :
358 : char identity_key_path[ PATH_MAX ];
359 : } rpc;
360 :
361 : struct {
362 : uint prometheus_listen_addr;
363 : ushort prometheus_listen_port;
364 : } metric;
365 :
366 : struct {
367 : ulong fec_max;
368 : ulong max_vote_accounts;
369 :
370 : ulong funk_obj_id;
371 : ulong txncache_obj_id;
372 : ulong progcache_obj_id;
373 :
374 : char shred_cap[ PATH_MAX ];
375 :
376 : char identity_key_path[ PATH_MAX ];
377 : uint ip_addr;
378 : char vote_account_path[ PATH_MAX ];
379 :
380 : ushort expected_shred_version;
381 :
382 : ulong heap_size_gib;
383 : ulong max_live_slots;
384 :
385 : /* not specified in TOML */
386 :
387 : ulong enable_features_cnt;
388 : char enable_features[ 16 ][ FD_BASE58_ENCODED_32_SZ ];
389 :
390 : int larger_max_cost_per_block;
391 :
392 : ulong capture_start_slot;
393 : char solcap_capture[ PATH_MAX ];
394 : char dump_proto_dir[ PATH_MAX ];
395 : int dump_block_to_pb;
396 :
397 : struct {
398 : int enabled;
399 : uchar tip_payment_program_addr[ 32 ];
400 : uchar tip_distribution_program_addr[ 32 ];
401 : char vote_account_path[ PATH_MAX ];
402 : } bundle;
403 :
404 : } replay;
405 :
406 : struct {
407 : ulong funk_obj_id;
408 : ulong txncache_obj_id;
409 : ulong progcache_obj_id;
410 :
411 : ulong max_live_slots;
412 :
413 : ulong capture_start_slot;
414 : char solcap_capture[ PATH_MAX ];
415 : char dump_proto_dir[ PATH_MAX ];
416 : int dump_instr_to_pb;
417 : int dump_txn_to_pb;
418 : int dump_syscall_to_pb;
419 : int dump_elf_to_pb;
420 : } exec;
421 :
422 : struct {
423 : ushort send_to_port;
424 : uint send_to_ip_addr;
425 : ulong conn_cnt;
426 : int no_quic;
427 : } benchs;
428 :
429 : struct {
430 : ushort rpc_port;
431 : uint rpc_ip_addr;
432 : } bencho;
433 :
434 : struct {
435 : ulong accounts_cnt;
436 : int mode;
437 : float contending_fraction;
438 : float cu_price_spread;
439 : } benchg;
440 :
441 : struct {
442 : ushort repair_intake_listen_port;
443 : ushort repair_serve_listen_port;
444 : char identity_key_path[ PATH_MAX ];
445 : ulong max_pending_shred_sets;
446 : ulong slot_max;
447 :
448 : /* non-config */
449 :
450 : ulong repair_sign_depth;
451 : ulong repair_sign_cnt;
452 :
453 : ulong end_slot; /* repair profiler mode only */
454 : } repair;
455 :
456 : struct {
457 : char slots_pending[PATH_MAX];
458 :
459 : ulong expected_shred_version;
460 :
461 : /* non-config */
462 :
463 : char identity_key_path[ PATH_MAX ];
464 : char shred_cap_archive[ PATH_MAX ];
465 : char shred_cap_replay[ PATH_MAX ];
466 : ulong shred_cap_end_slot;
467 :
468 : char blockstore_file[ PATH_MAX ];
469 : char blockstore_restore[ PATH_MAX ];
470 : } store_int;
471 :
472 : struct {
473 : ushort send_src_port;
474 :
475 : /* non-config */
476 :
477 : uint ip_addr;
478 : char identity_key_path[ PATH_MAX ];
479 : } send;
480 :
481 : struct {
482 : uint fake_dst_ip;
483 : } pktgen;
484 :
485 : struct {
486 : ulong end_slot;
487 : char rocksdb_path[ PATH_MAX ];
488 : char shredcap_path[ PATH_MAX ];
489 : char bank_hash_path[ PATH_MAX ];
490 : char ingest_mode[ 32 ];
491 :
492 : /* Set internally by the archiver tile */
493 : int archive_fd;
494 : } archiver;
495 :
496 : struct {
497 : int hard_fork_fatal;
498 : ulong max_live_slots;
499 : ulong max_vote_lookahead;
500 : char identity_key[ PATH_MAX ];
501 : char vote_account[ PATH_MAX ];
502 : char base_path[PATH_MAX];
503 : } tower;
504 :
505 : struct {
506 : char folder_path[ PATH_MAX ];
507 : ushort repair_intake_listen_port;
508 : ulong write_buffer_size; /* Size of the write buffer for the capture tile */
509 : int enable_publish_stake_weights;
510 : char manifest_path[ PATH_MAX ];
511 :
512 : /* Set internally by the capture tile */
513 : int shreds_fd;
514 : int requests_fd;
515 : int fecs_fd;
516 : int peers_fd;
517 : int bank_hashes_fd;
518 : int slices_fd;
519 : } shredcap;
520 :
521 : #define FD_TOPO_SNAPSHOTS_GOSSIP_LIST_MAX (32UL)
522 0 : #define FD_TOPO_SNAPSHOTS_SERVERS_MAX (16UL)
523 :
524 : struct fd_topo_tile_snapct {
525 : char snapshots_path[ PATH_MAX ];
526 :
527 : struct {
528 : uint max_local_full_effective_age;
529 : uint max_local_incremental_age;
530 :
531 : struct {
532 : int allow_any;
533 : ulong allow_list_cnt;
534 : fd_pubkey_t allow_list[ FD_TOPO_SNAPSHOTS_GOSSIP_LIST_MAX ];
535 : ulong block_list_cnt;
536 : fd_pubkey_t block_list[ FD_TOPO_SNAPSHOTS_GOSSIP_LIST_MAX ];
537 : } gossip;
538 :
539 : ulong servers_cnt;
540 : fd_ip4_port_t servers[ FD_TOPO_SNAPSHOTS_SERVERS_MAX ];
541 : } sources;
542 :
543 : int incremental_snapshots;
544 : uint max_full_snapshots_to_keep;
545 : uint max_incremental_snapshots_to_keep;
546 : uint full_effective_age_cancel_threshold;
547 : } snapct;
548 :
549 : struct {
550 : char snapshots_path[ PATH_MAX ];
551 : } snapld;
552 :
553 : struct {
554 : ulong max_live_slots;
555 : ulong funk_obj_id;
556 : ulong txncache_obj_id;
557 :
558 : uint lthash_disabled : 1;
559 : uint use_vinyl : 1;
560 : ulong vinyl_meta_map_obj_id;
561 : ulong vinyl_meta_pool_obj_id;
562 : ulong snapwr_depth;
563 : char vinyl_path[ PATH_MAX ];
564 : } snapin;
565 :
566 : struct {
567 : ulong dcache_obj_id;
568 : char vinyl_path[ PATH_MAX ];
569 : } snapwr;
570 :
571 : struct {
572 :
573 : uint bind_address;
574 : ushort bind_port;
575 :
576 : ushort expected_shred_version;
577 : ulong entrypoints_cnt;
578 : fd_ip4_port_t entrypoints[ FD_TOPO_GOSSIP_ENTRYPOINTS_MAX ];
579 : } ipecho;
580 :
581 : struct {
582 : ulong max_live_slots;
583 : ulong txncache_obj_id;
584 : ulong funk_obj_id;
585 : ulong progcache_obj_id;
586 : } bank;
587 :
588 : struct {
589 : ulong funk_obj_id;
590 : } resolv;
591 :
592 : struct {
593 : ulong funk_obj_id;
594 :
595 : int allow_download;
596 :
597 : ushort expected_shred_version;
598 : ulong entrypoints_cnt;
599 : fd_ip4_port_t entrypoints[ FD_TOPO_GOSSIP_ENTRYPOINTS_MAX ];
600 :
601 : int has_expected_genesis_hash;
602 : uchar expected_genesis_hash[ 32UL ];
603 :
604 : char genesis_path[ PATH_MAX ];
605 :
606 : uint target_gid;
607 : uint target_uid;
608 : } genesi;
609 :
610 : struct {
611 : ulong vinyl_meta_map_obj_id;
612 : ulong vinyl_meta_pool_obj_id;
613 : ulong vinyl_line_max;
614 : ulong vinyl_cnc_obj_id; /* optional */
615 : ulong vinyl_data_obj_id;
616 : char vinyl_bstream_path[ PATH_MAX ];
617 :
618 : int io_type; /* FD_VINYL_IO_TYPE_* */
619 : uint uring_depth;
620 : } vinyl;
621 : };
622 : };
623 :
624 : typedef struct fd_topo_tile fd_topo_tile_t;
625 :
626 : typedef struct {
627 : ulong id;
628 : char name[ 13UL ];
629 : ulong wksp_id;
630 :
631 : ulong offset;
632 : ulong footprint;
633 : } fd_topo_obj_t;
634 :
635 : /* An fd_topo_t represents the overall structure of a Firedancer
636 : configuration, describing all the workspaces, tiles, and links
637 : between them. */
638 : struct fd_topo {
639 : char app_name[ 256UL ];
640 : uchar props[ 16384UL ];
641 :
642 : ulong wksp_cnt;
643 : ulong link_cnt;
644 : ulong tile_cnt;
645 : ulong obj_cnt;
646 :
647 : fd_topo_wksp_t workspaces[ FD_TOPO_MAX_WKSPS ];
648 : fd_topo_link_t links[ FD_TOPO_MAX_LINKS ];
649 : fd_topo_tile_t tiles[ FD_TOPO_MAX_TILES ];
650 : fd_topo_obj_t objs[ FD_TOPO_MAX_OBJS ];
651 :
652 : ulong agave_affinity_cnt;
653 : ulong agave_affinity_cpu_idx[ FD_TILE_MAX ];
654 :
655 : ulong max_page_size; /* 2^21 or 2^30 */
656 : ulong gigantic_page_threshold; /* see [hugetlbfs.gigantic_page_threshold_mib]*/
657 : };
658 : typedef struct fd_topo fd_topo_t;
659 :
660 : typedef struct {
661 : char const * name;
662 :
663 : int keep_host_networking;
664 : int allow_connect;
665 : int allow_renameat;
666 : ulong rlimit_file_cnt;
667 : ulong rlimit_address_space;
668 : ulong rlimit_data;
669 : int for_tpool;
670 :
671 : ulong (*populate_allowed_seccomp)( fd_topo_t const * topo, fd_topo_tile_t const * tile, ulong out_cnt, struct sock_filter * out );
672 : ulong (*populate_allowed_fds )( fd_topo_t const * topo, fd_topo_tile_t const * tile, ulong out_fds_sz, int * out_fds );
673 : ulong (*scratch_align )( void );
674 : ulong (*scratch_footprint )( fd_topo_tile_t const * tile );
675 : ulong (*loose_footprint )( fd_topo_tile_t const * tile );
676 : void (*privileged_init )( fd_topo_t * topo, fd_topo_tile_t * tile );
677 : void (*unprivileged_init )( fd_topo_t * topo, fd_topo_tile_t * tile );
678 : void (*run )( fd_topo_t * topo, fd_topo_tile_t * tile );
679 : ulong (*rlimit_file_cnt_fn )( fd_topo_t const * topo, fd_topo_tile_t const * tile );
680 : } fd_topo_run_tile_t;
681 :
682 : struct fd_topo_obj_callbacks {
683 : char const * name;
684 : ulong (* footprint )( fd_topo_t const * topo, fd_topo_obj_t const * obj );
685 : ulong (* align )( fd_topo_t const * topo, fd_topo_obj_t const * obj );
686 : ulong (* loose )( fd_topo_t const * topo, fd_topo_obj_t const * obj );
687 : void (* new )( fd_topo_t const * topo, fd_topo_obj_t const * obj );
688 : };
689 :
690 : typedef struct fd_topo_obj_callbacks fd_topo_obj_callbacks_t;
691 :
692 : FD_PROTOTYPES_BEGIN
693 :
694 : FD_FN_CONST static inline ulong
695 918 : fd_topo_workspace_align( void ) {
696 : /* This needs to be the max( align ) of all the child members that
697 : could be aligned into this workspace, otherwise our footprint
698 : calculation will not be correct. For now just set to 4096 but this
699 : should probably be calculated dynamically, or we should reduce
700 : those child aligns if we can. */
701 918 : return 4096UL;
702 918 : }
703 :
704 : void *
705 : fd_topo_obj_laddr( fd_topo_t const * topo,
706 : ulong obj_id );
707 :
708 : /* Returns a pointer in the local address space to the base address of
709 : the workspace out of which the given object was allocated. */
710 :
711 : static inline void *
712 : fd_topo_obj_wksp_base( fd_topo_t const * topo,
713 0 : ulong obj_id ) {
714 0 : FD_TEST( obj_id<FD_TOPO_MAX_OBJS );
715 0 : fd_topo_obj_t const * obj = &topo->objs[ obj_id ];
716 0 : FD_TEST( obj->id == obj_id );
717 0 : ulong const wksp_id = obj->wksp_id;
718 :
719 0 : FD_TEST( wksp_id<FD_TOPO_MAX_WKSPS );
720 0 : fd_topo_wksp_t const * wksp = &topo->workspaces[ wksp_id ];
721 0 : FD_TEST( wksp->id == wksp_id );
722 0 : return wksp->wksp;
723 0 : }
724 :
725 : FD_FN_PURE static inline ulong
726 : fd_topo_tile_name_cnt( fd_topo_t const * topo,
727 6 : char const * name ) {
728 6 : ulong cnt = 0;
729 129 : for( ulong i=0; i<topo->tile_cnt; i++ ) {
730 123 : if( FD_UNLIKELY( !strcmp( topo->tiles[ i ].name, name ) ) ) cnt++;
731 123 : }
732 6 : return cnt;
733 6 : }
734 :
735 : /* Finds the workspace of a given name in the topology. Returns
736 : ULONG_MAX if there is no such workspace. There can be at most one
737 : workspace of a given name. */
738 :
739 : FD_FN_PURE static inline ulong
740 : fd_topo_find_wksp( fd_topo_t const * topo,
741 1269 : char const * name ) {
742 37716 : for( ulong i=0; i<topo->wksp_cnt; i++ ) {
743 37716 : if( FD_UNLIKELY( !strcmp( topo->workspaces[ i ].name, name ) ) ) return i;
744 37716 : }
745 0 : return ULONG_MAX;
746 1269 : }
747 :
748 : /* Find the tile of a given name and kind_id in the topology, there will
749 : be at most one such tile, since kind_id is unique among the name.
750 : Returns ULONG_MAX if there is no such tile. */
751 :
752 : FD_FN_PURE static inline ulong
753 : fd_topo_find_tile( fd_topo_t const * topo,
754 : char const * name,
755 912 : ulong kind_id ) {
756 19011 : for( ulong i=0; i<topo->tile_cnt; i++ ) {
757 19005 : if( FD_UNLIKELY( !strcmp( topo->tiles[ i ].name, name ) ) && topo->tiles[ i ].kind_id == kind_id ) return i;
758 19005 : }
759 6 : return ULONG_MAX;
760 912 : }
761 :
762 : /* Find the link of a given name and kind_id in the topology, there will
763 : be at most one such link, since kind_id is unique among the name.
764 : Returns ULONG_MAX if there is no such link. */
765 :
766 : FD_FN_PURE static inline ulong
767 : fd_topo_find_link( fd_topo_t const * topo,
768 : char const * name,
769 726 : ulong kind_id ) {
770 24681 : for( ulong i=0; i<topo->link_cnt; i++ ) {
771 24681 : if( FD_UNLIKELY( !strcmp( topo->links[ i ].name, name ) ) && topo->links[ i ].kind_id == kind_id ) return i;
772 24681 : }
773 0 : return ULONG_MAX;
774 726 : }
775 :
776 : FD_FN_PURE static inline ulong
777 : fd_topo_find_tile_in_link( fd_topo_t const * topo,
778 : fd_topo_tile_t const * tile,
779 : char const * name,
780 0 : ulong kind_id ) {
781 0 : for( ulong i=0; i<tile->in_cnt; i++ ) {
782 0 : if( FD_UNLIKELY( !strcmp( topo->links[ tile->in_link_id[ i ] ].name, name ) )
783 0 : && topo->links[ tile->in_link_id[ i ] ].kind_id == kind_id ) return i;
784 0 : }
785 0 : return ULONG_MAX;
786 0 : }
787 :
788 : FD_FN_PURE static inline ulong
789 : fd_topo_find_tile_out_link( fd_topo_t const * topo,
790 : fd_topo_tile_t const * tile,
791 : char const * name,
792 0 : ulong kind_id ) {
793 0 : for( ulong i=0; i<tile->out_cnt; i++ ) {
794 0 : if( FD_UNLIKELY( !strcmp( topo->links[ tile->out_link_id[ i ] ].name, name ) )
795 0 : && topo->links[ tile->out_link_id[ i ] ].kind_id == kind_id ) return i;
796 0 : }
797 0 : return ULONG_MAX;
798 0 : }
799 :
800 : /* Find the id of the tile which is a producer for the given link. If
801 : no tile is a producer for the link, returns ULONG_MAX. This should
802 : not be possible for a well formed and validated topology. */
803 : FD_FN_PURE static inline ulong
804 : fd_topo_find_link_producer( fd_topo_t const * topo,
805 0 : fd_topo_link_t const * link ) {
806 0 : for( ulong i=0; i<topo->tile_cnt; i++ ) {
807 0 : fd_topo_tile_t const * tile = &topo->tiles[ i ];
808 :
809 0 : for( ulong j=0; j<tile->out_cnt; j++ ) {
810 0 : if( FD_UNLIKELY( tile->out_link_id[ j ] == link->id ) ) return i;
811 0 : }
812 0 : }
813 0 : return ULONG_MAX;
814 0 : }
815 :
816 : /* Given a link, count the number of consumers of that link among all
817 : the tiles in the topology. */
818 : FD_FN_PURE static inline ulong
819 : fd_topo_link_consumer_cnt( fd_topo_t const * topo,
820 225 : fd_topo_link_t const * link ) {
821 225 : ulong cnt = 0;
822 9225 : for( ulong i=0; i<topo->tile_cnt; i++ ) {
823 9000 : fd_topo_tile_t const * tile = &topo->tiles[ i ];
824 44775 : for( ulong j=0; j<tile->in_cnt; j++ ) {
825 35775 : if( FD_UNLIKELY( tile->in_link_id[ j ] == link->id ) ) cnt++;
826 35775 : }
827 9000 : }
828 :
829 225 : return cnt;
830 225 : }
831 :
832 : /* Given a link, count the number of reliable consumers of that link
833 : among all the tiles in the topology. */
834 : FD_FN_PURE static inline ulong
835 : fd_topo_link_reliable_consumer_cnt( fd_topo_t const * topo,
836 0 : fd_topo_link_t const * link ) {
837 0 : ulong cnt = 0;
838 0 : for( ulong i=0; i<topo->tile_cnt; i++ ) {
839 0 : fd_topo_tile_t const * tile = &topo->tiles[ i ];
840 0 : for( ulong j=0; j<tile->in_cnt; j++ ) {
841 0 : if( FD_UNLIKELY( tile->in_link_id[ j ] == link->id && tile->in_link_reliable[ j ] ) ) cnt++;
842 0 : }
843 0 : }
844 :
845 0 : return cnt;
846 0 : }
847 :
848 : FD_FN_PURE static inline ulong
849 : fd_topo_tile_consumer_cnt( fd_topo_t const * topo,
850 0 : fd_topo_tile_t const * tile ) {
851 0 : (void)topo;
852 0 : return tile->out_cnt;
853 0 : }
854 :
855 : FD_FN_PURE static inline ulong
856 : fd_topo_tile_reliable_consumer_cnt( fd_topo_t const * topo,
857 0 : fd_topo_tile_t const * tile ) {
858 0 : ulong reliable_cons_cnt = 0UL;
859 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
860 0 : fd_topo_tile_t const * consumer_tile = &topo->tiles[ i ];
861 0 : for( ulong j=0UL; j<consumer_tile->in_cnt; j++ ) {
862 0 : for( ulong k=0UL; k<tile->out_cnt; k++ ) {
863 0 : if( FD_UNLIKELY( consumer_tile->in_link_id[ j ]==tile->out_link_id[ k ] && consumer_tile->in_link_reliable[ j ] ) ) {
864 0 : reliable_cons_cnt++;
865 0 : }
866 0 : }
867 0 : }
868 0 : }
869 0 : return reliable_cons_cnt;
870 0 : }
871 :
872 : FD_FN_PURE static inline ulong
873 : fd_topo_tile_producer_cnt( fd_topo_t const * topo,
874 0 : fd_topo_tile_t const * tile ) {
875 0 : (void)topo;
876 0 : ulong in_cnt = 0UL;
877 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
878 0 : if( FD_UNLIKELY( !tile->in_link_poll[ i ] ) ) continue;
879 0 : in_cnt++;
880 0 : }
881 0 : return in_cnt;
882 0 : }
883 :
884 : /* Join (map into the process) all shared memory (huge/gigantic pages)
885 : needed by the tile, in the given topology. All memory associated
886 : with the tile (aka. used by links that the tile either produces to or
887 : consumes from, or used by the tile itself for its cnc) will be
888 : attached (mapped into the process).
889 :
890 : This is needed to play nicely with the sandbox. Once a process is
891 : sandboxed we can no longer map any memory. */
892 : void
893 : fd_topo_join_tile_workspaces( fd_topo_t * topo,
894 : fd_topo_tile_t * tile );
895 :
896 : /* Join (map into the process) the shared memory (huge/gigantic pages)
897 : for the given workspace. Mode is one of
898 : FD_SHMEM_JOIN_MODE_READ_WRITE or FD_SHMEM_JOIN_MODE_READ_ONLY and
899 : determines the prot argument that will be passed to mmap when mapping
900 : the pages in (PROT_WRITE or PROT_READ respectively). */
901 : void
902 : fd_topo_join_workspace( fd_topo_t * topo,
903 : fd_topo_wksp_t * wksp,
904 : int mode );
905 :
906 : /* Join (map into the process) all shared memory (huge/gigantic pages)
907 : needed by all tiles in the topology. Mode is one of
908 : FD_SHMEM_JOIN_MODE_READ_WRITE or FD_SHMEM_JOIN_MODE_READ_ONLY and
909 : determines the prot argument that will be passed to mmap when
910 : mapping the pages in (PROT_WRITE or PROT_READ respectively). */
911 : void
912 : fd_topo_join_workspaces( fd_topo_t * topo,
913 : int mode );
914 :
915 : /* Leave (unmap from the process) the shared memory needed for the
916 : given workspace in the topology, if it was previously mapped.
917 :
918 : topo and wksp are assumed non-NULL. It is OK if the workspace
919 : has not been previously joined, in which case this is a no-op. */
920 :
921 : void
922 : fd_topo_leave_workspace( fd_topo_t * topo,
923 : fd_topo_wksp_t * wksp );
924 :
925 : /* Leave (unmap from the process) all shared memory needed by all
926 : tiles in the topology, if each of them was mapped.
927 :
928 : topo is assumed non-NULL. Only workspaces which were previously
929 : joined are unmapped. */
930 :
931 : void
932 : fd_topo_leave_workspaces( fd_topo_t * topo );
933 :
934 : /* Create the given workspace needed by the topology on the system.
935 : This does not "join" the workspaces (map their memory into the
936 : process), but only creates the .wksp file and formats it correctly
937 : as a workspace.
938 :
939 : Returns 0 on success and -1 on failure, with errno set to the error.
940 : The only reason for failure currently that will be returned is
941 : ENOMEM, as other unexpected errors will cause the program to exit.
942 :
943 : If update_existing is 1, the workspace will not be created from
944 : scratch but it will be assumed that it already exists from a prior
945 : run and needs to be maybe resized and then have the header
946 : structures reinitialized. This can save a very expensive operation
947 : of zeroing all of the workspace pages. This is dangerous in
948 : production because it can leave stray memory from prior runs around,
949 : and should only be used in development environments. */
950 :
951 : int
952 : fd_topo_create_workspace( fd_topo_t * topo,
953 : fd_topo_wksp_t * wksp,
954 : int update_existing );
955 :
956 : /* Join the standard IPC objects needed by the topology of this particular
957 : tile */
958 :
959 : void
960 : fd_topo_fill_tile( fd_topo_t * topo,
961 : fd_topo_tile_t * tile );
962 :
963 : /* Same as fd_topo_fill_tile but fills in all the objects for a
964 : particular workspace with the given mode. */
965 : void
966 : fd_topo_workspace_fill( fd_topo_t * topo,
967 : fd_topo_wksp_t * wksp );
968 :
969 : /* Apply a new function to every object that is resident in the given
970 : workspace in the topology. */
971 :
972 : void
973 : fd_topo_wksp_new( fd_topo_t const * topo,
974 : fd_topo_wksp_t const * wksp,
975 : fd_topo_obj_callbacks_t ** callbacks );
976 :
977 : /* Same as fd_topo_fill_tile but fills in all tiles in the topology. */
978 :
979 : void
980 : fd_topo_fill( fd_topo_t * topo );
981 :
982 : /* fd_topo_tile_stack_join joins a huge page optimized stack for the
983 : provided tile. The stack is assumed to already exist at a known
984 : path in the hugetlbfs mount. */
985 :
986 : void *
987 : fd_topo_tile_stack_join( char const * app_name,
988 : char const * tile_name,
989 : ulong tile_kind_id );
990 :
991 : /* fd_topo_run_single_process runs all the tiles in a single process
992 : (the calling process). This spawns a thread for each tile, switches
993 : that thread to the given UID and GID and then runs the tile in it.
994 : Each thread will never exit, as tiles are expected to run forever.
995 : An error is logged and the application will exit if a tile exits.
996 : The function itself does return after spawning all the threads.
997 :
998 : The threads will not be sandboxed in any way, except switching to the
999 : provided UID and GID, so they will share the same address space, and
1000 : not have any seccomp restrictions or use any Linux namespaces. The
1001 : calling thread will also switch to the provided UID and GID before
1002 : it returns.
1003 :
1004 : In production, when running with an Agave child process this is
1005 : used for spawning certain tiles inside the Agave address space.
1006 : It's also useful for tooling and debugging, but is not how the main
1007 : production Firedancer process runs. For production, each tile is run
1008 : in its own address space with a separate process and full security
1009 : sandbox.
1010 :
1011 : The agave argument determines which tiles are started. If the
1012 : argument is 0 or 1, only non-agave (or only agave) tiles are started.
1013 : If the argument is any other value, all tiles in the topology are
1014 : started regardless of if they are Agave tiles or not. */
1015 :
1016 : void
1017 : fd_topo_run_single_process( fd_topo_t * topo,
1018 : int agave,
1019 : uint uid,
1020 : uint gid,
1021 : fd_topo_run_tile_t (* tile_run )( fd_topo_tile_t const * tile ) );
1022 :
1023 : /* fd_topo_run_tile runs the given tile directly within the current
1024 : process (and thread). The function will never return, as tiles are
1025 : expected to run forever. An error is logged and the application will
1026 : exit if the tile exits.
1027 :
1028 : The sandbox argument determines if the current process will be
1029 : sandboxed fully before starting the tile. The thread will switch to
1030 : the UID and GID provided before starting the tile, even if the thread
1031 : is not being sandboxed. Although POSIX specifies that all threads in
1032 : a process must share a UID and GID, this is not the case on Linux.
1033 : The thread will switch to the provided UID and GID without switching
1034 : the other threads in the process.
1035 :
1036 : If keep_controlling_terminal is set to 0, and the sandbox is enabled
1037 : the controlling terminal will be detached as an additional sandbox
1038 : measure, but you will not be able to send Ctrl+C or other signals
1039 : from the terminal. See fd_sandbox.h for more information.
1040 :
1041 : The allow_fd argument is only used if sandbox is true, and is a file
1042 : descriptor which will be allowed to exist in the process. Normally
1043 : the sandbox code rejects and aborts if there is an unexpected file
1044 : descriptor present on boot. This is helpful to allow a parent
1045 : process to be notified on termination of the tile by waiting for a
1046 : pipe file descriptor to get closed.
1047 :
1048 : wait and debugger are both used in debugging. If wait is non-NULL,
1049 : the runner will wait until the value pointed to by wait is non-zero
1050 : before launching the tile. Likewise, if debugger is non-NULL, the
1051 : runner will wait until a debugger is attached before setting the
1052 : value pointed to by debugger to non-zero. These are intended to be
1053 : used as a pair, where many tiles share a waiting reference, and then
1054 : one of the tiles (a tile you want to attach the debugger to) has the
1055 : same reference provided as the debugger, so all tiles will stop and
1056 : wait for the debugger to attach to it before proceeding. */
1057 :
1058 : void
1059 : fd_topo_run_tile( fd_topo_t * topo,
1060 : fd_topo_tile_t * tile,
1061 : int sandbox,
1062 : int keep_controlling_terminal,
1063 : int dumpable,
1064 : uint uid,
1065 : uint gid,
1066 : int allow_fd,
1067 : volatile int * wait,
1068 : volatile int * debugger,
1069 : fd_topo_run_tile_t * tile_run );
1070 :
1071 : /* This is for determining the value of RLIMIT_MLOCK that we need to
1072 : successfully run all tiles in separate processes. The value returned
1073 : is the maximum amount of memory that will be locked with mlock() by
1074 : any individual process in the tree. Specifically, if we have three
1075 : tile processes, and they each need to lock 5, 9, and 2 MiB of memory
1076 : respectively, RLIMIT_MLOCK needs to be 9 MiB to allow all three
1077 : process mlock() calls to succeed.
1078 :
1079 : Tiles lock memory in three ways. Any workspace they are using, they
1080 : lock the entire workspace. Then each tile uses huge pages for the
1081 : stack which are also locked, and finally some tiles use private
1082 : locked mmaps outside the workspace for storing key material. The
1083 : results here include all of this memory together.
1084 :
1085 : The result is not necessarily the amount of memory used by the tile
1086 : process, although it will be quite close. Tiles could potentially
1087 : allocate memory (eg, with brk) without needing to lock it, which
1088 : would not need to included, and some kernel memory that tiles cause
1089 : to be allocated (for example XSK buffers) is also not included. The
1090 : actual amount of memory used will not be less than this value. */
1091 : FD_FN_PURE ulong
1092 : fd_topo_mlock_max_tile( fd_topo_t const * topo );
1093 :
1094 : /* Same as fd_topo_mlock_max_tile, but for loading the entire topology
1095 : into one process, rather than a separate process per tile. This is
1096 : used, for example, by the configuration code when it creates all the
1097 : workspaces, or the monitor that maps the entire system into one
1098 : address space. */
1099 : FD_FN_PURE ulong
1100 : fd_topo_mlock( fd_topo_t const * topo );
1101 :
1102 : /* This returns the number of gigantic pages needed by the topology on
1103 : the provided numa node. It includes pages needed by the workspaces,
1104 : as well as additional allocations like huge pages for process stacks
1105 : and private key storage. */
1106 :
1107 : FD_FN_PURE ulong
1108 : fd_topo_gigantic_page_cnt( fd_topo_t const * topo,
1109 : ulong numa_idx );
1110 :
1111 : /* This returns the number of huge pages in the application needed by
1112 : the topology on the provided numa node. It includes pages needed by
1113 : things placed in the hugetlbfs (workspaces, process stacks). If
1114 : include_anonymous is true, it also includes anonymous hugepages which
1115 : are needed but are not placed in the hugetlbfs. */
1116 :
1117 : FD_FN_PURE ulong
1118 : fd_topo_huge_page_cnt( fd_topo_t const * topo,
1119 : ulong numa_idx,
1120 : int include_anonymous );
1121 :
1122 : /* Prints a message describing the topology to an output stream. If
1123 : stdout is true, will be written to stdout, otherwise will be written
1124 : as a NOTICE log message to the log file. */
1125 : void
1126 : fd_topo_print_log( int stdout,
1127 : fd_topo_t * topo );
1128 :
1129 : FD_PROTOTYPES_END
1130 :
1131 : #endif /* HEADER_fd_src_disco_topo_fd_topo_h */
|