Line data Source code
1 : /* The repair command spawns a smaller topology for profiling the repair
2 : tile. This is a standalone application, and it can be run in mainnet,
3 : testnet and/or a private cluster. */
4 :
5 : #include "../../../disco/net/fd_net_tile.h"
6 : #include "../../../disco/tiles.h"
7 : #include "../../../disco/topo/fd_topob.h"
8 : #include "../../../disco/topo/fd_cpu_topo.h"
9 : #include "../../../util/pod/fd_pod_format.h"
10 : #include "../../../util/tile/fd_tile_private.h"
11 :
12 : #include "../../firedancer/topology.h"
13 : #include "../../firedancer/topology.c"
14 : #include "../../shared/commands/configure/configure.h"
15 : #include "../../shared/commands/run/run.h" /* initialize_workspaces */
16 : #include "../../shared/fd_config.h" /* config_t */
17 : #include "../../shared_dev/commands/dev.h"
18 : #include "../../../disco/tiles.h"
19 : #include "../../../disco/topo/fd_topob.h"
20 : #include "../../../util/pod/fd_pod_format.h"
21 :
22 : #include <unistd.h> /* pause */
23 :
24 : fd_topo_run_tile_t fdctl_tile_run( fd_topo_tile_t const * tile );
25 :
26 : static ulong
27 0 : link_permit_no_producers( fd_topo_t * topo, char * link_name ) {
28 0 : ulong found = 0UL;
29 0 : for( ulong link_i = 0UL; link_i < topo->link_cnt; link_i++ ) {
30 0 : if( !strcmp( topo->links[ link_i ].name, link_name ) ) {
31 0 : topo->links[ link_i ].permit_no_producers = 1;
32 0 : found++;
33 0 : }
34 0 : }
35 0 : return found;
36 0 : }
37 :
38 : static ulong
39 0 : link_permit_no_consumers( fd_topo_t * topo, char * link_name ) {
40 0 : ulong found = 0UL;
41 0 : for( ulong link_i = 0UL; link_i < topo->link_cnt; link_i++ ) {
42 0 : if( !strcmp( topo->links[ link_i ].name, link_name ) ) {
43 0 : topo->links[ link_i ].permit_no_consumers = 1;
44 0 : found++;
45 0 : }
46 0 : }
47 0 : return found;
48 0 : }
49 :
50 : /* repair_topo is a subset of "src/app/firedancer/topology.c" at commit
51 : 0d8386f4f305bb15329813cfe4a40c3594249e96, slightly modified to work
52 : as a repair profiler. TODO ideally, one should invoke the firedancer
53 : topology first, and exclude the parts that are not needed, instead of
54 : manually generating new topologies for every command. This would
55 : also guarantee that the profiler is replicating (as close as possible)
56 : the full topology. */
57 : static void
58 0 : repair_topo( config_t * config ) {
59 0 : resolve_gossip_entrypoints( config );
60 :
61 0 : ulong net_tile_cnt = config->layout.net_tile_count;
62 0 : ulong shred_tile_cnt = config->layout.shred_tile_count;
63 0 : ulong quic_tile_cnt = config->layout.quic_tile_count;
64 :
65 0 : fd_topo_t * topo = { fd_topob_new( &config->topo, config->name ) };
66 0 : topo->max_page_size = fd_cstr_to_shmem_page_sz( config->hugetlbfs.max_page_size );
67 0 : topo->gigantic_page_threshold = config->hugetlbfs.gigantic_page_threshold_mib << 20;
68 :
69 : /* topo, name */
70 0 : fd_topob_wksp( topo, "metric_in" );
71 0 : fd_topob_wksp( topo, "net_shred" );
72 0 : fd_topob_wksp( topo, "net_gossip" );
73 0 : fd_topob_wksp( topo, "net_repair" );
74 0 : fd_topob_wksp( topo, "net_quic" );
75 :
76 0 : fd_topob_wksp( topo, "shred_repair" );
77 0 : fd_topob_wksp( topo, "stake_out" );
78 :
79 0 : fd_topob_wksp( topo, "poh_shred" );
80 :
81 0 : fd_topob_wksp( topo, "shred_sign" );
82 0 : fd_topob_wksp( topo, "sign_shred" );
83 :
84 0 : fd_topob_wksp( topo, "gossip_sign" );
85 0 : fd_topob_wksp( topo, "sign_gossip" );
86 :
87 0 : fd_topob_wksp( topo, "crds_shred" );
88 0 : fd_topob_wksp( topo, "gossip_repai" );
89 0 : fd_topob_wksp( topo, "gossip_verif" );
90 0 : fd_topob_wksp( topo, "gossip_tower" );
91 :
92 0 : fd_topob_wksp( topo, "repair_sign" );
93 0 : fd_topob_wksp( topo, "sign_repair" );
94 :
95 0 : fd_topob_wksp( topo, "repair_repla" );
96 0 : fd_topob_wksp( topo, "gossip_send" );
97 0 : fd_topob_wksp( topo, "send_txns" );
98 :
99 0 : fd_topob_wksp( topo, "shred" );
100 0 : fd_topob_wksp( topo, "sign" );
101 0 : fd_topob_wksp( topo, "repair" );
102 0 : fd_topob_wksp( topo, "gossip" );
103 0 : fd_topob_wksp( topo, "metric" );
104 0 : fd_topob_wksp( topo, "fec_sets" );
105 :
106 0 : fd_topob_wksp( topo, "slot_fseqs" ); /* fseqs for marked slots eg. turbine slot */
107 :
108 0 : #define FOR(cnt) for( ulong i=0UL; i<cnt; i++ )
109 :
110 0 : ulong pending_fec_shreds_depth = fd_ulong_min( fd_ulong_pow2_up( config->tiles.shred.max_pending_shred_sets * FD_REEDSOL_DATA_SHREDS_MAX ), USHORT_MAX + 1 /* dcache max */ );
111 :
112 : /* topo, link_name, wksp_name, depth, mtu, burst */
113 0 : FOR(quic_tile_cnt) fd_topob_link( topo, "quic_net", "net_quic", config->net.ingress_buffer_size, FD_NET_MTU, 1UL );
114 0 : FOR(shred_tile_cnt) fd_topob_link( topo, "shred_net", "net_shred", config->net.ingress_buffer_size, FD_NET_MTU, 1UL );
115 :
116 : /**/ fd_topob_link( topo, "stake_out", "stake_out", 128UL, 40UL + 40200UL * 40UL, 1UL );
117 :
118 0 : FOR(shred_tile_cnt) fd_topob_link( topo, "shred_sign", "shred_sign", 128UL, 32UL, 1UL );
119 0 : FOR(shred_tile_cnt) fd_topob_link( topo, "sign_shred", "sign_shred", 128UL, 64UL, 1UL );
120 :
121 : /**/ fd_topob_link( topo, "gossip_sign", "gossip_sign", 128UL, 2048UL, 1UL );
122 0 : /**/ fd_topob_link( topo, "sign_gossip", "sign_gossip", 128UL, 64UL, 1UL );
123 :
124 0 : /**/ fd_topob_link( topo, "gossip_verif", "gossip_verif", config->tiles.verify.receive_buffer_size, FD_TPU_MTU, 1UL );
125 0 : /**/ fd_topob_link( topo, "gossip_tower", "gossip_tower", 128UL, FD_TPU_MTU, 1UL );
126 :
127 : /**/ fd_topob_link( topo, "crds_shred", "crds_shred", 128UL, 8UL + 40200UL * 38UL, 1UL );
128 0 : /**/ fd_topob_link( topo, "gossip_repai", "gossip_repai", 128UL, 40200UL * 38UL, 1UL );
129 0 : /**/ fd_topob_link( topo, "gossip_send", "gossip_send", 128UL, 40200UL * 38UL, 1UL );
130 :
131 0 : /**/ fd_topob_link( topo, "gossip_net", "net_gossip", config->net.ingress_buffer_size, FD_NET_MTU, 1UL );
132 :
133 0 : /**/ fd_topob_link( topo, "repair_net", "net_repair", config->net.ingress_buffer_size, FD_NET_MTU, 1UL );
134 0 : /**/ fd_topob_link( topo, "repair_sign", "repair_sign", 128UL, 2048UL, 1UL );
135 0 : FOR(shred_tile_cnt) fd_topob_link( topo, "shred_repair", "shred_repair", pending_fec_shreds_depth, FD_SHRED_REPAIR_MTU, 2UL /* at most 2 msgs per after_frag */ );
136 :
137 0 : FOR(shred_tile_cnt) fd_topob_link( topo, "repair_shred", "shred_repair", pending_fec_shreds_depth, sizeof(fd_ed25519_sig_t), 1UL );
138 0 : /**/ fd_topob_link( topo, "sign_repair", "sign_repair", 128UL, 64UL, 1UL );
139 0 : /**/ fd_topob_link( topo, "repair_repla", "repair_repla", 65536UL, sizeof(fd_fec_out_t), 1UL );
140 0 : /**/ fd_topob_link( topo, "poh_shred", "poh_shred", 16384UL, USHORT_MAX, 1UL );
141 :
142 0 : /**/ fd_topob_link( topo, "send_txns", "send_txns", 128UL, FD_TXN_MTU, 1UL );
143 :
144 0 : ushort parsed_tile_to_cpu[ FD_TILE_MAX ];
145 : /* Unassigned tiles will be floating, unless auto topology is enabled. */
146 0 : for( ulong i=0UL; i<FD_TILE_MAX; i++ ) parsed_tile_to_cpu[ i ] = USHORT_MAX;
147 :
148 0 : int is_auto_affinity = !strcmp( config->layout.affinity, "auto" );
149 0 : int is_bench_auto_affinity = !strcmp( config->development.bench.affinity, "auto" );
150 :
151 0 : if( FD_UNLIKELY( is_auto_affinity != is_bench_auto_affinity ) ) {
152 0 : FD_LOG_ERR(( "The CPU affinity string in the configuration file under [layout.affinity] and [development.bench.affinity] must all be set to 'auto' or all be set to a specific CPU affinity string." ));
153 0 : }
154 :
155 0 : fd_topo_cpus_t cpus[1];
156 0 : fd_topo_cpus_init( cpus );
157 :
158 0 : ulong affinity_tile_cnt = 0UL;
159 0 : if( FD_LIKELY( !is_auto_affinity ) ) affinity_tile_cnt = fd_tile_private_cpus_parse( config->layout.affinity, parsed_tile_to_cpu );
160 :
161 0 : ulong tile_to_cpu[ FD_TILE_MAX ] = {0};
162 0 : for( ulong i=0UL; i<affinity_tile_cnt; i++ ) {
163 0 : if( FD_UNLIKELY( parsed_tile_to_cpu[ i ]!=USHORT_MAX && parsed_tile_to_cpu[ i ]>=cpus->cpu_cnt ) )
164 0 : FD_LOG_ERR(( "The CPU affinity string in the configuration file under [layout.affinity] specifies a CPU index of %hu, but the system "
165 0 : "only has %lu CPUs. You should either change the CPU allocations in the affinity string, or increase the number of CPUs "
166 0 : "in the system.",
167 0 : parsed_tile_to_cpu[ i ], cpus->cpu_cnt ));
168 0 : tile_to_cpu[ i ] = fd_ulong_if( parsed_tile_to_cpu[ i ]==USHORT_MAX, ULONG_MAX, (ulong)parsed_tile_to_cpu[ i ] );
169 0 : }
170 :
171 0 : fd_topos_net_tiles( topo, config->layout.net_tile_count, &config->net, config->tiles.netlink.max_routes, config->tiles.netlink.max_peer_routes, config->tiles.netlink.max_neighbors, tile_to_cpu );
172 :
173 0 : FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_gossip", i, config->net.ingress_buffer_size );
174 0 : FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_repair", i, config->net.ingress_buffer_size );
175 0 : FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_quic", i, config->net.ingress_buffer_size );
176 0 : FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_shred", i, config->net.ingress_buffer_size );
177 :
178 : /* topo, tile_name, tile_wksp, metrics_wksp, cpu_idx, is_agave, uses_keyswitch */
179 0 : FOR(shred_tile_cnt) fd_topob_tile( topo, "shred", "shred", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 1 );
180 0 : /**/ fd_topob_tile( topo, "sign", "sign", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 1 );
181 0 : /**/ fd_topob_tile( topo, "metric", "metric", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
182 0 : /**/ fd_topob_tile( topo, "gossip", "gossip", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
183 0 : fd_topo_tile_t * repair_tile = fd_topob_tile( topo, "repair", "repair", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
184 :
185 : /* Setup a shared wksp object for fec sets. */
186 :
187 0 : ulong shred_depth = 65536UL; /* from fdctl/topology.c shred_store link. MAKE SURE TO KEEP IN SYNC. */
188 0 : ulong fec_set_cnt = shred_depth + config->tiles.shred.max_pending_shred_sets + 4UL;
189 0 : ulong fec_sets_sz = fec_set_cnt*sizeof(fd_shred34_t)*4; /* mirrors # of dcache entires in frankendancer */
190 0 : fd_topo_obj_t * fec_sets_obj = setup_topo_fec_sets( topo, "fec_sets", shred_tile_cnt*fec_sets_sz );
191 0 : for( ulong i=0UL; i<shred_tile_cnt; i++ ) {
192 0 : fd_topo_tile_t * shred_tile = &topo->tiles[ fd_topo_find_tile( topo, "shred", i ) ];
193 0 : fd_topob_tile_uses( topo, shred_tile, fec_sets_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
194 0 : }
195 0 : fd_topob_tile_uses( topo, repair_tile, fec_sets_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
196 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, fec_sets_obj->id, "fec_sets" ) );
197 :
198 : /* There's another special fseq that's used to communicate the shred
199 : version from the Agave boot path to the shred tile. */
200 0 : fd_topo_obj_t * poh_shred_obj = fd_topob_obj( topo, "fseq", "poh_shred" );
201 0 : fd_topo_tile_t * poh_tile = &topo->tiles[ fd_topo_find_tile( topo, "gossip", 0UL ) ];
202 0 : fd_topob_tile_uses( topo, poh_tile, poh_shred_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
203 :
204 : /* root_slot is an fseq marking the validator's current Tower root. */
205 :
206 0 : fd_topo_obj_t * root_slot_obj = fd_topob_obj( topo, "fseq", "slot_fseqs" );
207 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, root_slot_obj->id, "root_slot" ) );
208 :
209 : /* turbine_slot0 is an fseq marking the slot number of the first shred
210 : we observed from Turbine. This is a useful heuristic for
211 : determining when replay has progressed past the slot in which we
212 : last voted. The idea is once replay has proceeded past the slot
213 : from which validator stopped replaying and therefore also stopped
214 : voting (crashed, shutdown, etc.), it will have "read-back" its
215 : latest tower in the ledger. Note this logic is not true in the
216 : case our latest tower vote was for a minority fork. */
217 :
218 0 : fd_topo_obj_t * turbine_slot0_obj = fd_topob_obj( topo, "fseq", "slot_fseqs" );
219 0 : fd_topob_tile_uses( topo, repair_tile, turbine_slot0_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
220 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, turbine_slot0_obj->id, "turbine_slot0" ) );
221 :
222 : /* turbine_slot is an fseq marking the highest slot we've observed on
223 : a shred. This is continuously updated as the validator is running
224 : and is used to determine whether the validator is caught up with
225 : the rest of the cluster. */
226 :
227 0 : fd_topo_obj_t * turbine_slot_obj = fd_topob_obj( topo, "fseq", "slot_fseqs" );
228 0 : fd_topob_tile_uses( topo, repair_tile, turbine_slot_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
229 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, turbine_slot_obj->id, "turbine_slot" ) );
230 :
231 0 : for( ulong i=0UL; i<shred_tile_cnt; i++ ) {
232 0 : fd_topo_tile_t * shred_tile = &topo->tiles[ fd_topo_find_tile( topo, "shred", i ) ];
233 0 : fd_topob_tile_uses( topo, shred_tile, poh_shred_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
234 0 : }
235 0 : FD_TEST( fd_pod_insertf_ulong( topo->props, poh_shred_obj->id, "poh_shred" ) );
236 :
237 0 : if( FD_LIKELY( !is_auto_affinity ) ) {
238 0 : if( FD_UNLIKELY( affinity_tile_cnt<topo->tile_cnt ) )
239 0 : FD_LOG_ERR(( "The topology you are using has %lu tiles, but the CPU affinity specified in the config tile as [layout.affinity] only provides for %lu cores. "
240 0 : "You should either increase the number of cores dedicated to Firedancer in the affinity string, or decrease the number of cores needed by reducing "
241 0 : "the total tile count. You can reduce the tile count by decreasing individual tile counts in the [layout] section of the configuration file.",
242 0 : topo->tile_cnt, affinity_tile_cnt ));
243 0 : if( FD_UNLIKELY( affinity_tile_cnt>topo->tile_cnt ) )
244 0 : FD_LOG_WARNING(( "The topology you are using has %lu tiles, but the CPU affinity specified in the config tile as [layout.affinity] provides for %lu cores. "
245 0 : "Not all cores in the affinity will be used by Firedancer. You may wish to increase the number of tiles in the system by increasing "
246 0 : "individual tile counts in the [layout] section of the configuration file.",
247 0 : topo->tile_cnt, affinity_tile_cnt ));
248 0 : }
249 :
250 : /* topo, tile_name, tile_kind_id, fseq_wksp, link_name, link_kind_id, reliable, polled */
251 0 : for( ulong j=0UL; j<shred_tile_cnt; j++ )
252 0 : fd_topos_tile_in_net( topo, "metric_in", "shred_net", j, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
253 0 : for( ulong j=0UL; j<quic_tile_cnt; j++ )
254 0 : fd_topos_tile_in_net( topo, "metric_in", "quic_net", j, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
255 :
256 0 : /**/ fd_topob_tile_in( topo, "gossip", 0UL, "metric_in", "send_txns", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
257 :
258 0 : /**/ fd_topos_tile_in_net( topo, "metric_in", "gossip_net", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
259 0 : /**/ fd_topos_tile_in_net( topo, "metric_in", "repair_net", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
260 :
261 0 : FOR(shred_tile_cnt) for( ulong j=0UL; j<net_tile_cnt; j++ )
262 0 : fd_topob_tile_in( topo, "shred", i, "metric_in", "net_shred", j, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
263 0 : FOR(shred_tile_cnt) fd_topob_tile_in( topo, "shred", i, "metric_in", "poh_shred", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
264 0 : FOR(shred_tile_cnt) fd_topob_tile_in( topo, "shred", i, "metric_in", "stake_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
265 0 : FOR(shred_tile_cnt) fd_topob_tile_in( topo, "shred", i, "metric_in", "crds_shred", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
266 0 : FOR(shred_tile_cnt) fd_topob_tile_out( topo, "shred", i, "shred_repair", i );
267 0 : FOR(shred_tile_cnt) fd_topob_tile_out( topo, "shred", i, "shred_net", i );
268 :
269 0 : FOR(shred_tile_cnt) fd_topob_tile_in( topo, "shred", i, "metric_in", "repair_shred", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
270 :
271 : /**/ fd_topob_tile_out( topo, "repair", 0UL, "repair_net", 0UL );
272 :
273 : /* Sign links don't need to be reliable because they are synchronous,
274 : so there's at most one fragment in flight at a time anyway. The
275 : sign links are also not polled by the mux, instead the tiles will
276 : read the sign responses out of band in a dedicated spin loop. */
277 0 : for( ulong i=0UL; i<shred_tile_cnt; i++ ) {
278 0 : /**/ fd_topob_tile_in( topo, "sign", 0UL, "metric_in", "shred_sign", i, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
279 0 : /**/ fd_topob_tile_out( topo, "shred", i, "shred_sign", i );
280 0 : /**/ fd_topob_tile_in( topo, "shred", i, "metric_in", "sign_shred", i, FD_TOPOB_UNRELIABLE, FD_TOPOB_UNPOLLED );
281 0 : /**/ fd_topob_tile_out( topo, "sign", 0UL, "sign_shred", i );
282 0 : }
283 :
284 0 : FOR(net_tile_cnt) fd_topob_tile_in( topo, "gossip", 0UL, "metric_in", "net_gossip", i, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
285 0 : /**/ fd_topob_tile_out( topo, "gossip", 0UL, "gossip_net", 0UL );
286 0 : /**/ fd_topob_tile_out( topo, "gossip", 0UL, "crds_shred", 0UL );
287 0 : /**/ fd_topob_tile_out( topo, "gossip", 0UL, "gossip_repai", 0UL );
288 0 : /**/ fd_topob_tile_out( topo, "gossip", 0UL, "gossip_verif", 0UL );
289 0 : /**/ fd_topob_tile_in( topo, "sign", 0UL, "metric_in", "gossip_sign", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
290 0 : /**/ fd_topob_tile_out( topo, "gossip", 0UL, "gossip_sign", 0UL );
291 0 : /**/ fd_topob_tile_in( topo, "gossip", 0UL, "metric_in", "sign_gossip", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_UNPOLLED );
292 0 : /**/ fd_topob_tile_out( topo, "sign", 0UL, "sign_gossip", 0UL );
293 0 : /**/ fd_topob_tile_out( topo, "gossip", 0UL, "gossip_send", 0UL );
294 0 : /**/ fd_topob_tile_out( topo, "gossip", 0UL, "gossip_tower", 0UL );
295 :
296 0 : FOR(net_tile_cnt) fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "net_repair", i, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
297 0 : /**/ fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "gossip_repai", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
298 0 : /**/ fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "stake_out", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
299 0 : FOR(shred_tile_cnt) fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "shred_repair", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
300 :
301 0 : /**/ fd_topob_tile_in( topo, "sign", 0UL, "metric_in", "repair_sign", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
302 0 : /**/ fd_topob_tile_out( topo, "repair", 0UL, "repair_sign", 0UL );
303 0 : /**/ fd_topob_tile_in( topo, "repair", 0UL, "metric_in", "sign_repair", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_UNPOLLED );
304 0 : /**/ fd_topob_tile_out( topo, "repair", 0UL, "repair_repla", 0UL );
305 0 : FOR(shred_tile_cnt) fd_topob_tile_out( topo, "repair", 0UL, "repair_shred", i );
306 0 : /**/ fd_topob_tile_out( topo, "sign", 0UL, "sign_repair", 0UL );
307 :
308 0 : if( 1 ) {
309 0 : fd_topob_wksp( topo, "scap" );
310 :
311 0 : fd_topob_wksp( topo, "repair_scap" );
312 0 : fd_topob_wksp( topo, "replay_scap" );
313 :
314 0 : fd_topo_tile_t * scap_tile = fd_topob_tile( topo, "scap", "scap", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
315 :
316 0 : fd_topob_link( topo, "repair_scap", "repair_scap", 128UL, FD_SLICE_MAX_WITH_HEADERS, 1UL );
317 0 : fd_topob_link( topo, "replay_scap", "replay_scap", 128UL, sizeof(fd_hash_t)+sizeof(ulong), 1UL );
318 :
319 0 : fd_topob_tile_in( topo, "scap", 0UL, "metric_in", "repair_net", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
320 0 : for( ulong j=0UL; j<net_tile_cnt; j++ ) {
321 0 : fd_topob_tile_in( topo, "scap", 0UL, "metric_in", "net_shred", j, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
322 0 : }
323 0 : for( ulong j=0UL; j<shred_tile_cnt; j++ ) {
324 0 : fd_topob_tile_in( topo, "scap", 0UL, "metric_in", "shred_repair", j, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
325 0 : }
326 0 : fd_topob_tile_in( topo, "scap", 0UL, "metric_in", "crds_shred", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
327 0 : fd_topob_tile_in( topo, "scap", 0UL, "metric_in", "gossip_repai", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
328 :
329 0 : fd_topob_tile_in( topo, "scap", 0UL, "metric_in", "repair_scap", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
330 0 : fd_topob_tile_in( topo, "scap", 0UL, "metric_in", "replay_scap", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
331 :
332 0 : fd_topob_tile_uses( topo, scap_tile, root_slot_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
333 0 : fd_topob_tile_out( topo, "scap", 0UL, "stake_out", 0UL );
334 0 : }
335 :
336 0 : FD_TEST( link_permit_no_producers( topo, "quic_net" ) == quic_tile_cnt );
337 0 : FD_TEST( link_permit_no_producers( topo, "poh_shred" ) == 1UL );
338 0 : FD_TEST( link_permit_no_producers( topo, "send_txns" ) == 1UL );
339 0 : FD_TEST( link_permit_no_producers( topo, "repair_scap" ) == 1UL );
340 0 : FD_TEST( link_permit_no_producers( topo, "replay_scap" ) == 1UL );
341 :
342 0 : FD_TEST( link_permit_no_consumers( topo, "net_quic" ) == quic_tile_cnt );
343 0 : FD_TEST( link_permit_no_consumers( topo, "gossip_verif" ) == 1UL );
344 0 : FD_TEST( link_permit_no_consumers( topo, "gossip_tower" ) == 1UL );
345 0 : FD_TEST( link_permit_no_consumers( topo, "gossip_send" ) == 1UL );
346 0 : FD_TEST( link_permit_no_consumers( topo, "repair_repla" ) == 1UL );
347 :
348 0 : FOR(net_tile_cnt) fd_topos_net_tile_finish( topo, i );
349 :
350 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
351 0 : fd_topo_tile_t * tile = &topo->tiles[ i ];
352 0 : if( !fd_topo_configure_tile( tile, config ) ) {
353 0 : FD_LOG_ERR(( "unknown tile name %lu `%s`", i, tile->name ));
354 0 : }
355 0 : }
356 :
357 0 : if( FD_UNLIKELY( is_auto_affinity ) ) fd_topob_auto_layout( topo, 0 );
358 :
359 0 : fd_topob_finish( topo, CALLBACKS );
360 :
361 0 : config->topo = *topo;
362 0 : }
363 :
364 : extern int * fd_log_private_shared_lock;
365 :
366 : void
367 : repair_cmd_args( int * pargc,
368 : char *** pargv,
369 0 : args_t * args ) {
370 :
371 0 : if( FD_UNLIKELY( !*pargc ) ) FD_LOG_ERR(( "usage: repair --manifest-path <manifest_path>" ));
372 :
373 0 : char const * manifest_path = fd_env_strip_cmdline_cstr( pargc, pargv, "--manifest-path", NULL, "unknonw" );
374 0 : fd_cstr_fini( fd_cstr_append_cstr_safe( fd_cstr_init( args->repair.manifest_path ), manifest_path, sizeof(args->repair.manifest_path)-1UL ) );
375 :
376 0 : FD_LOG_NOTICE(( "repair manifest_path %s", args->repair.manifest_path ));
377 0 : }
378 :
379 : static void
380 : repair_cmd_fn( args_t * args FD_PARAM_UNUSED,
381 0 : config_t * config ) {
382 :
383 0 : FD_LOG_NOTICE(( "Repair profiler topo" ));
384 :
385 0 : memset( &config->topo, 0, sizeof(config->topo) );
386 0 : repair_topo( config );
387 :
388 0 : for( ulong i=0UL; i<config->topo.tile_cnt; i++ ) {
389 0 : fd_topo_tile_t * tile = &config->topo.tiles[ i ];
390 0 : if( FD_UNLIKELY( !strcmp( tile->name, "scap" ) ) ) {
391 : /* This is not part of the config, and it must be set manually
392 : on purpose as a safety mechanism. */
393 0 : tile->shredcap.enable_publish_stake_weights = 1;
394 0 : strncpy( tile->shredcap.manifest_path, args->repair.manifest_path, PATH_MAX );
395 0 : }
396 0 : }
397 :
398 0 : FD_LOG_NOTICE(( "Repair profiler init" ));
399 :
400 0 : args_t configure_args = {
401 0 : .configure.command = CONFIGURE_CMD_INIT,
402 0 : };
403 0 : for( ulong i=0UL; STAGES[ i ]; i++ ) {
404 0 : configure_args.configure.stages[ i ] = STAGES[ i ];
405 0 : }
406 0 : configure_cmd_fn( &configure_args, config );
407 0 : if( 0==strcmp( config->net.provider, "xdp" ) ) {
408 0 : fd_xdp_fds_t fds = fd_topo_install_xdp( &config->topo, config->net.bind_address_parsed );
409 0 : (void)fds;
410 0 : }
411 :
412 0 : run_firedancer_init( config, 1 );
413 :
414 0 : fd_log_private_shared_lock[ 1 ] = 0;
415 0 : fd_topo_join_workspaces( &config->topo, FD_SHMEM_JOIN_MODE_READ_WRITE );
416 :
417 0 : FD_LOG_NOTICE(( "Repair profiler run" ));
418 :
419 0 : fd_topo_run_single_process( &config->topo, 0, config->uid, config->gid, fdctl_tile_run );
420 0 : for(;;) pause();
421 0 : }
422 :
423 : action_t fd_action_repair = {
424 : .name = "repair",
425 : .args = repair_cmd_args,
426 : .fn = repair_cmd_fn,
427 : .perm = dev_cmd_perm,
428 : };
|