LCOV - code coverage report
Current view: top level - app/firedancer-dev/commands - repair.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 692 0.0 %
Date: 2025-12-07 04:58:33 Functions: 0 18 0.0 %

          Line data    Source code
       1             : /* The repair command spawns a smaller topology for profiling the repair
       2             :    tile.  This is a standalone application, and it can be run in mainnet,
       3             :    testnet and/or a private cluster. */
       4             : 
       5             : #include "../../../disco/net/fd_net_tile.h"
       6             : #include "../../../disco/tiles.h"
       7             : #include "../../../disco/topo/fd_topob.h"
       8             : #include "../../../disco/topo/fd_cpu_topo.h"
       9             : #include "../../../util/pod/fd_pod_format.h"
      10             : #include "../../../util/tile/fd_tile_private.h"
      11             : 
      12             : #include "../../firedancer/topology.h"
      13             : #include "../../firedancer/topology.c"
      14             : #include "../../shared/commands/configure/configure.h"
      15             : #include "../../shared/commands/run/run.h" /* initialize_workspaces */
      16             : #include "../../shared/fd_config.h" /* config_t */
      17             : #include "../../shared_dev/commands/dev.h"
      18             : #include "../../../disco/tiles.h"
      19             : #include "../../../disco/topo/fd_topob.h"
      20             : #include "../../../util/pod/fd_pod_format.h"
      21             : #include "../../../waltz/resolv/fd_io_readline.h"
      22             : #include "../../platform/fd_sys_util.h"
      23             : #include "../../shared/commands/monitor/helper.h"
      24             : #include "../../../disco/metrics/fd_metrics.h"
      25             : #include "../../../discof/repair/fd_repair_tile.c"
      26             : 
      27             : #include "gossip.h"
      28             : #include "core_subtopo.h"
      29             : 
      30             : #include <unistd.h> /* pause */
      31             : #include <fcntl.h>
      32             : #include <stdio.h>
      33             : #include <termios.h>
      34             : #include <errno.h>
      35             : 
      36             : struct fd_location_info {
      37             :   ulong ip4_addr;         /* for map key convenience */
      38             :   char location[ 128 ];
      39             : };
      40             : typedef struct fd_location_info fd_location_info_t;
      41             : 
      42             : #define MAP_NAME    fd_location_table
      43           0 : #define MAP_T       fd_location_info_t
      44           0 : #define MAP_KEY     ip4_addr
      45           0 : #define MAP_LG_SLOT_CNT 16
      46             : #define MAP_MEMOIZE 0
      47             : #include "../../../util/tmpl/fd_map.c"
      48             : 
      49             : uchar __attribute__((aligned(alignof(fd_location_info_t)))) location_table_mem[ sizeof(fd_location_info_t) * (1 << 16 ) ];
      50             : 
      51             : static struct termios termios_backup;
      52             : 
      53             : static void
      54           0 : restore_terminal( void ) {
      55           0 :   (void)tcsetattr( STDIN_FILENO, TCSANOW, &termios_backup );
      56           0 : }
      57             : 
      58             : fd_topo_run_tile_t
      59             : fdctl_tile_run( fd_topo_tile_t const * tile );
      60             : 
      61             : /* repair_topo is a subset of "src/app/firedancer/topology.c" at commit
      62             :    0d8386f4f305bb15329813cfe4a40c3594249e96, slightly modified to work
      63             :    as a repair catchup.  TODO ideally, one should invoke the firedancer
      64             :    topology first, and exclude the parts that are not needed, instead of
      65             :    manually generating new topologies for every command.  This would
      66             :    also guarantee that the catchup is replicating (as close as possible)
      67             :    the full topology. */
      68             : static void
      69           0 : repair_topo( config_t * config ) {
      70           0 :   resolve_gossip_entrypoints( config );
      71             : 
      72           0 :   ulong net_tile_cnt    = config->layout.net_tile_count;
      73           0 :   ulong shred_tile_cnt  = config->layout.shred_tile_count;
      74           0 :   ulong quic_tile_cnt   = config->layout.quic_tile_count;
      75           0 :   ulong sign_tile_cnt   = config->firedancer.layout.sign_tile_count;
      76           0 :   ulong gossvf_tile_cnt = config->firedancer.layout.gossvf_tile_count;
      77             : 
      78           0 :   fd_topo_t * topo = { fd_topob_new( &config->topo, config->name ) };
      79           0 :   topo->max_page_size = fd_cstr_to_shmem_page_sz( config->hugetlbfs.max_page_size );
      80           0 :   topo->gigantic_page_threshold = config->hugetlbfs.gigantic_page_threshold_mib << 20;
      81             : 
      82           0 :   ulong tile_to_cpu[ FD_TILE_MAX ] = {0};
      83           0 :   ushort parsed_tile_to_cpu[ FD_TILE_MAX ];
      84             :   /* Unassigned tiles will be floating, unless auto topology is enabled. */
      85           0 :   for( ulong i=0UL; i<FD_TILE_MAX; i++ ) parsed_tile_to_cpu[ i ] = USHORT_MAX;
      86             : 
      87           0 :   int is_auto_affinity = !strcmp( config->layout.affinity, "auto" );
      88           0 :   int is_bench_auto_affinity = !strcmp( config->development.bench.affinity, "auto" );
      89             : 
      90           0 :   if( FD_UNLIKELY( is_auto_affinity != is_bench_auto_affinity ) ) {
      91           0 :     FD_LOG_ERR(( "The CPU affinity string in the configuration file under [layout.affinity] and [development.bench.affinity] must all be set to 'auto' or all be set to a specific CPU affinity string." ));
      92           0 :   }
      93             : 
      94           0 :   fd_topo_cpus_t cpus[1];
      95           0 :   fd_topo_cpus_init( cpus );
      96             : 
      97           0 :   ulong affinity_tile_cnt = 0UL;
      98           0 :   if( FD_LIKELY( !is_auto_affinity ) ) affinity_tile_cnt = fd_tile_private_cpus_parse( config->layout.affinity, parsed_tile_to_cpu );
      99             : 
     100           0 :   for( ulong i=0UL; i<affinity_tile_cnt; i++ ) {
     101           0 :     if( FD_UNLIKELY( parsed_tile_to_cpu[ i ]!=USHORT_MAX && parsed_tile_to_cpu[ i ]>=cpus->cpu_cnt ) )
     102           0 :       FD_LOG_ERR(( "The CPU affinity string in the configuration file under [layout.affinity] specifies a CPU index of %hu, but the system "
     103           0 :                   "only has %lu CPUs. You should either change the CPU allocations in the affinity string, or increase the number of CPUs "
     104           0 :                   "in the system.",
     105           0 :                   parsed_tile_to_cpu[ i ], cpus->cpu_cnt ));
     106           0 :     tile_to_cpu[ i ] = fd_ulong_if( parsed_tile_to_cpu[ i ]==USHORT_MAX, ULONG_MAX, (ulong)parsed_tile_to_cpu[ i ] );
     107           0 :   }
     108             : 
     109           0 :   fd_core_subtopo(   config, tile_to_cpu );
     110           0 :   fd_gossip_subtopo( config, tile_to_cpu );
     111             : 
     112             :   /*             topo, name */
     113           0 :   fd_topob_wksp( topo, "net_shred"    );
     114           0 :   fd_topob_wksp( topo, "net_repair"   );
     115           0 :   fd_topob_wksp( topo, "net_quic"     );
     116             : 
     117           0 :   fd_topob_wksp( topo, "shred_out"    );
     118           0 :   fd_topob_wksp( topo, "replay_stake" );
     119             : 
     120           0 :   fd_topob_wksp( topo, "poh_shred"    );
     121             : 
     122           0 :   fd_topob_wksp( topo, "shred_sign"   );
     123           0 :   fd_topob_wksp( topo, "sign_shred"   );
     124             : 
     125           0 :   fd_topob_wksp( topo, "repair_sign"  );
     126           0 :   fd_topob_wksp( topo, "sign_repair"  );
     127             : 
     128           0 :   fd_topob_wksp( topo, "send_out"     );
     129             : 
     130           0 :   fd_topob_wksp( topo, "shred"        );
     131           0 :   fd_topob_wksp( topo, "repair"       );
     132           0 :   fd_topob_wksp( topo, "fec_sets"     );
     133           0 :   fd_topob_wksp( topo, "snapin_manif" );
     134             : 
     135           0 :   fd_topob_wksp( topo, "slot_fseqs"   ); /* fseqs for marked slots eg. turbine slot */
     136           0 :   fd_topob_wksp( topo, "genesi_out"   ); /* mock genesi_out for ipecho */
     137             : 
     138           0 :   #define FOR(cnt) for( ulong i=0UL; i<cnt; i++ )
     139             : 
     140           0 :   ulong pending_fec_shreds_depth = fd_ulong_min( fd_ulong_pow2_up( config->tiles.shred.max_pending_shred_sets * FD_REEDSOL_DATA_SHREDS_MAX ), USHORT_MAX + 1 /* dcache max */ );
     141             : 
     142             :   /*                                  topo, link_name,      wksp_name,      depth,                                    mtu,                           burst */
     143           0 :   FOR(quic_tile_cnt)   fd_topob_link( topo, "quic_net",     "net_quic",     config->net.ingress_buffer_size,          FD_NET_MTU,                    1UL );
     144           0 :   FOR(shred_tile_cnt)  fd_topob_link( topo, "shred_net",    "net_shred",    config->net.ingress_buffer_size,          FD_NET_MTU,                    1UL );
     145             : 
     146             :   /**/                 fd_topob_link( topo, "replay_stake", "replay_stake", 128UL,                                    40UL + 40200UL * 40UL,         1UL );
     147             : 
     148           0 :   FOR(shred_tile_cnt)  fd_topob_link( topo, "shred_sign",   "shred_sign",   128UL,                                    32UL,                          1UL );
     149           0 :   FOR(shred_tile_cnt)  fd_topob_link( topo, "sign_shred",   "sign_shred",   128UL,                                    64UL,                          1UL );
     150             : 
     151           0 :   /**/                 fd_topob_link( topo, "repair_net",   "net_repair",   config->net.ingress_buffer_size,          FD_NET_MTU,                    1UL );
     152             : 
     153           0 :   FOR(shred_tile_cnt)  fd_topob_link( topo, "shred_out",    "shred_out",    pending_fec_shreds_depth,                 FD_SHRED_OUT_MTU,              2UL /* at most 2 msgs per after_frag */ );
     154             : 
     155           0 :   FOR(shred_tile_cnt)  fd_topob_link( topo, "repair_shred", "shred_out",    pending_fec_shreds_depth,                 sizeof(fd_ed25519_sig_t),      1UL );
     156             : 
     157           0 :   FOR(sign_tile_cnt-1) fd_topob_link( topo, "repair_sign",  "repair_sign",  128UL,                                    FD_REPAIR_MAX_PREIMAGE_SZ,     1UL );
     158           0 :   FOR(sign_tile_cnt-1) fd_topob_link( topo, "sign_repair",  "sign_repair",  128UL,                                    sizeof(fd_ed25519_sig_t),      1UL );
     159             : 
     160           0 :   /**/                 fd_topob_link( topo, "poh_shred",    "poh_shred",    16384UL,                                  USHORT_MAX,                    1UL );
     161             : 
     162           0 :   /**/                 fd_topob_link( topo, "send_out",     "send_out",     128UL,                                    FD_TXN_MTU,                    1UL );
     163             : 
     164             :   /**/                 fd_topob_link( topo, "snapin_manif", "snapin_manif", 2UL,                                      sizeof(fd_snapshot_manifest_t), 1UL );
     165             : 
     166             :   /**/                 fd_topob_link( topo, "genesi_out",  "genesi_out",    2UL,                                        128,     1UL );
     167             : 
     168           0 :   FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_repair", i, config->net.ingress_buffer_size );
     169           0 :   FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_quic",   i, config->net.ingress_buffer_size );
     170           0 :   FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_shred",  i, config->net.ingress_buffer_size );
     171             : 
     172             :   /*                                              topo, tile_name, tile_wksp, metrics_wksp, cpu_idx,                       is_agave, uses_keyswitch */
     173           0 :   FOR(shred_tile_cnt)              fd_topob_tile( topo, "shred",   "shred",   "metric_in",  tile_to_cpu[ topo->tile_cnt ], 0,        1 );
     174           0 :   fd_topo_tile_t * repair_tile =   fd_topob_tile( topo, "repair",  "repair",  "metric_in",  tile_to_cpu[ topo->tile_cnt ], 0,        0 );
     175             : 
     176             :   /* Setup a shared wksp object for fec sets. */
     177             : 
     178           0 :   ulong shred_depth = 65536UL; /* from fdctl/topology.c shred_store link. MAKE SURE TO KEEP IN SYNC. */
     179           0 :   ulong fec_set_cnt = shred_depth + config->tiles.shred.max_pending_shred_sets + 4UL;
     180           0 :   ulong fec_sets_sz = fec_set_cnt*sizeof(fd_shred34_t)*4; /* mirrors # of dcache entires in frankendancer */
     181           0 :   fd_topo_obj_t * fec_sets_obj = setup_topo_fec_sets( topo, "fec_sets", shred_tile_cnt*fec_sets_sz );
     182           0 :   for( ulong i=0UL; i<shred_tile_cnt; i++ ) {
     183           0 :     fd_topo_tile_t * shred_tile = &topo->tiles[ fd_topo_find_tile( topo, "shred", i ) ];
     184           0 :     fd_topob_tile_uses( topo, shred_tile,  fec_sets_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
     185           0 :   }
     186           0 :   fd_topob_tile_uses( topo, repair_tile, fec_sets_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
     187           0 :   FD_TEST( fd_pod_insertf_ulong( topo->props, fec_sets_obj->id, "fec_sets" ) );
     188             : 
     189             :   /* There's another special fseq that's used to communicate the shred
     190             :     version from the Agave boot path to the shred tile. */
     191           0 :   fd_topo_obj_t * poh_shred_obj = fd_topob_obj( topo, "fseq", "poh_shred" );
     192           0 :   fd_topo_tile_t * poh_tile = &topo->tiles[ fd_topo_find_tile( topo, "gossip", 0UL ) ];
     193           0 :   fd_topob_tile_uses( topo, poh_tile, poh_shred_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
     194             : 
     195             :   /* root_slot is an fseq marking the validator's current Tower root. */
     196             : 
     197           0 :   fd_topo_obj_t * root_slot_obj = fd_topob_obj( topo, "fseq", "slot_fseqs" );
     198           0 :   FD_TEST( fd_pod_insertf_ulong( topo->props, root_slot_obj->id, "root_slot" ) );
     199             : 
     200           0 :   for( ulong i=0UL; i<shred_tile_cnt; i++ ) {
     201           0 :     fd_topo_tile_t * shred_tile = &topo->tiles[ fd_topo_find_tile( topo, "shred", i ) ];
     202           0 :     fd_topob_tile_uses( topo, shred_tile, poh_shred_obj, FD_SHMEM_JOIN_MODE_READ_ONLY );
     203           0 :   }
     204           0 :   FD_TEST( fd_pod_insertf_ulong( topo->props, poh_shred_obj->id, "poh_shred" ) );
     205             : 
     206           0 :   if( FD_LIKELY( !is_auto_affinity ) ) {
     207           0 :     if( FD_UNLIKELY( affinity_tile_cnt<topo->tile_cnt ) )
     208           0 :       FD_LOG_ERR(( "The topology you are using has %lu tiles, but the CPU affinity specified in the config tile as [layout.affinity] only provides for %lu cores. "
     209           0 :                   "You should either increase the number of cores dedicated to Firedancer in the affinity string, or decrease the number of cores needed by reducing "
     210           0 :                   "the total tile count. You can reduce the tile count by decreasing individual tile counts in the [layout] section of the configuration file.",
     211           0 :                   topo->tile_cnt, affinity_tile_cnt ));
     212           0 :     if( FD_UNLIKELY( affinity_tile_cnt>topo->tile_cnt ) )
     213           0 :       FD_LOG_WARNING(( "The topology you are using has %lu tiles, but the CPU affinity specified in the config tile as [layout.affinity] provides for %lu cores. "
     214           0 :                       "Not all cores in the affinity will be used by Firedancer. You may wish to increase the number of tiles in the system by increasing "
     215           0 :                       "individual tile counts in the [layout] section of the configuration file.",
     216           0 :                       topo->tile_cnt, affinity_tile_cnt ));
     217           0 :   }
     218             : 
     219             :   /*                                      topo, tile_name, tile_kind_id, fseq_wksp,   link_name,      link_kind_id, reliable,            polled */
     220           0 :   for( ulong j=0UL; j<shred_tile_cnt; j++ )
     221           0 :                   fd_topos_tile_in_net(  topo,                          "metric_in", "shred_net",    j,            FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
     222           0 :   for( ulong j=0UL; j<quic_tile_cnt; j++ )
     223           0 :                   {fd_topos_tile_in_net(  topo,                          "metric_in", "quic_net",     j,            FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );} /* No reliable consumers of networking fragments, may be dropped or overrun */
     224             : 
     225           0 :   /**/            fd_topob_tile_in(      topo, "gossip",  0UL,         "metric_in", "send_out",      0UL,           FD_TOPOB_RELIABLE,   FD_TOPOB_POLLED );
     226             : 
     227           0 :   /**/            fd_topos_tile_in_net(  topo,                          "metric_in", "repair_net",   0UL,          FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
     228             : 
     229           0 :   FOR(shred_tile_cnt) for( ulong j=0UL; j<net_tile_cnt; j++ )
     230           0 :                        fd_topob_tile_in(  topo, "shred",  i,             "metric_in", "net_shred",     j,            FD_TOPOB_UNRELIABLE,   FD_TOPOB_POLLED ); /* No reliable consumers of networking fragments, may be dropped or overrun */
     231           0 :   FOR(shred_tile_cnt)  fd_topob_tile_in(  topo, "shred",  i,             "metric_in", "poh_shred",     0UL,          FD_TOPOB_RELIABLE,     FD_TOPOB_POLLED );
     232           0 :   FOR(shred_tile_cnt)  fd_topob_tile_in(  topo, "shred",  i,             "metric_in", "replay_stake",  0UL,          FD_TOPOB_RELIABLE,     FD_TOPOB_POLLED );
     233           0 :   FOR(shred_tile_cnt)  fd_topob_tile_in(  topo, "shred",  i,             "metric_in", "gossip_out",    0UL,          FD_TOPOB_RELIABLE,     FD_TOPOB_POLLED );
     234           0 :   FOR(shred_tile_cnt)  fd_topob_tile_out( topo, "shred",  i,                          "shred_out",     i                                                    );
     235           0 :   FOR(shred_tile_cnt)  fd_topob_tile_out( topo, "shred",  i,                          "shred_net",     i                                                    );
     236           0 :   FOR(shred_tile_cnt)  fd_topob_tile_in ( topo, "shred",  i,             "metric_in", "ipecho_out",    0UL,          FD_TOPOB_RELIABLE,     FD_TOPOB_POLLED );
     237           0 :   FOR(shred_tile_cnt)  fd_topob_tile_in(  topo, "shred",  i,             "metric_in",  "repair_shred", i,            FD_TOPOB_RELIABLE,   FD_TOPOB_POLLED   );
     238             : 
     239             :   /**/                 fd_topob_tile_out( topo, "repair",  0UL,                       "repair_net",    0UL                                                  );
     240             : 
     241             :   /* Sign links don't need to be reliable because they are synchronous,
     242             :     so there's at most one fragment in flight at a time anyway.  The
     243             :     sign links are also not polled by the mux, instead the tiles will
     244             :     read the sign responses out of band in a dedicated spin loop. */
     245           0 :   for( ulong i=0UL; i<shred_tile_cnt; i++ ) {
     246           0 :     /**/               fd_topob_tile_in(  topo, "sign",   0UL,           "metric_in", "shred_sign",    i,            FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED   );
     247           0 :     /**/               fd_topob_tile_out( topo, "shred",  i,                          "shred_sign",    i                                                    );
     248           0 :     /**/               fd_topob_tile_in(  topo, "shred",  i,             "metric_in", "sign_shred",    i,            FD_TOPOB_UNRELIABLE, FD_TOPOB_UNPOLLED );
     249           0 :     /**/               fd_topob_tile_out( topo, "sign",   0UL,                        "sign_shred",    i                                                    );
     250           0 :   }
     251           0 :   FOR(gossvf_tile_cnt) fd_topob_tile_in ( topo, "gossvf",   i,            "metric_in", "replay_stake", 0UL,          FD_TOPOB_RELIABLE,   FD_TOPOB_POLLED   );
     252             : 
     253           0 :   /**/                 fd_topob_tile_in ( topo, "gossip",   0UL,          "metric_in", "replay_stake", 0UL,          FD_TOPOB_RELIABLE,   FD_TOPOB_POLLED   );
     254             : 
     255           0 :   FOR(net_tile_cnt)    fd_topob_tile_in(  topo, "repair",  0UL,          "metric_in", "net_repair",    i,            FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED   ); /* No reliable consumers of networking fragments, may be dropped or overrun */
     256           0 :   /**/                 fd_topob_tile_in(  topo, "repair",  0UL,          "metric_in", "gossip_out",    0UL,          FD_TOPOB_RELIABLE,   FD_TOPOB_POLLED   );
     257           0 :   /**/                 fd_topob_tile_in(  topo, "repair",  0UL,          "metric_in", "replay_stake",  0UL,          FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED   );
     258           0 :                        fd_topob_tile_in(  topo, "repair",  0UL,          "metric_in", "snapin_manif",  0UL,          FD_TOPOB_RELIABLE,   FD_TOPOB_POLLED   );
     259           0 :   FOR(shred_tile_cnt)  fd_topob_tile_in(  topo, "repair",  0UL,          "metric_in", "shred_out",     i,            FD_TOPOB_RELIABLE,   FD_TOPOB_POLLED   );
     260           0 :   FOR(shred_tile_cnt)  fd_topob_tile_out( topo, "repair", 0UL,                        "repair_shred",  i                                                    );
     261           0 :   FOR(sign_tile_cnt-1) fd_topob_tile_out( topo, "repair", 0UL,                        "repair_sign",   i                                                    );
     262           0 :   FOR(sign_tile_cnt-1) fd_topob_tile_in ( topo, "sign",   i+1,           "metric_in", "repair_sign",   i,            FD_TOPOB_RELIABLE,   FD_TOPOB_POLLED   );
     263           0 :   FOR(sign_tile_cnt-1) fd_topob_tile_out( topo, "sign",   i+1,                        "sign_repair",   i                                                    );
     264           0 :   FOR(sign_tile_cnt-1) fd_topob_tile_in ( topo, "repair", 0UL,           "metric_in", "sign_repair",   i,            FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED   );
     265             : 
     266           0 :   /**/                 fd_topob_tile_in ( topo, "gossip", 0UL,           "metric_in", "sign_gossip",   0UL,          FD_TOPOB_UNRELIABLE, FD_TOPOB_UNPOLLED );
     267           0 :   /**/                 fd_topob_tile_in ( topo, "ipecho", 0UL,           "metric_in", "genesi_out",    0UL,          FD_TOPOB_RELIABLE,   FD_TOPOB_POLLED   );
     268             : 
     269           0 :   if( 1 ) {
     270           0 :     fd_topob_wksp( topo, "scap" );
     271             : 
     272           0 :     fd_topo_tile_t * scap_tile = fd_topob_tile( topo, "scap", "scap", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
     273             : 
     274           0 :     fd_topob_tile_in(  topo, "scap", 0UL, "metric_in", "repair_net", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
     275           0 :     for( ulong j=0UL; j<net_tile_cnt; j++ ) {
     276           0 :       fd_topob_tile_in(  topo, "scap", 0UL, "metric_in", "net_shred", j, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
     277           0 :     }
     278           0 :     for( ulong j=0UL; j<shred_tile_cnt; j++ ) {
     279           0 :       fd_topob_tile_in(  topo, "scap", 0UL, "metric_in", "shred_out", j, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
     280           0 :     }
     281           0 :     fd_topob_tile_in( topo, "scap", 0UL, "metric_in", "gossip_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
     282             : 
     283           0 :     fd_topob_tile_uses( topo, scap_tile, root_slot_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
     284           0 :     fd_topob_tile_out( topo, "scap", 0UL, "replay_stake", 0UL );
     285           0 :     fd_topob_tile_out( topo, "scap", 0UL, "snapin_manif", 0UL );
     286           0 :   }
     287             : 
     288           0 :   FD_TEST( fd_link_permit_no_producers( topo, "quic_net"     ) == quic_tile_cnt );
     289           0 :   FD_TEST( fd_link_permit_no_producers( topo, "poh_shred"    ) == 1UL           );
     290           0 :   FD_TEST( fd_link_permit_no_producers( topo, "send_out"     ) == 1UL           );
     291           0 :   FD_TEST( fd_link_permit_no_producers( topo, "genesi_out"   ) == 1UL           );
     292           0 :   FD_TEST( fd_link_permit_no_consumers( topo, "net_quic"     ) == net_tile_cnt );
     293             : 
     294           0 :   config->tiles.send.send_src_port = 0; /* disable send */
     295             : 
     296           0 :   FOR(net_tile_cnt) fd_topos_net_tile_finish( topo, i );
     297             : 
     298           0 :   for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
     299           0 :     fd_topo_tile_t * tile = &topo->tiles[ i ];
     300           0 :     fd_topo_configure_tile( tile, config );
     301           0 :   }
     302             : 
     303           0 :   if( FD_UNLIKELY( is_auto_affinity ) ) fd_topob_auto_layout( topo, 0 );
     304             : 
     305           0 :   fd_topob_finish( topo, CALLBACKS );
     306             : 
     307           0 :   config->topo = *topo;
     308           0 : }
     309             : 
     310             : extern int * fd_log_private_shared_lock;
     311             : 
     312             : static char *
     313           0 : fmt_count( char buf[ static 64 ], ulong count ) {
     314           0 :   char tmp[ 64 ];
     315           0 :   if( FD_LIKELY( count<1000UL ) ) FD_TEST( fd_cstr_printf_check( tmp, 64UL, NULL, "%lu", count ) );
     316           0 :   else if( FD_LIKELY( count<1000000UL ) ) FD_TEST( fd_cstr_printf_check( tmp, 64UL, NULL, "%.1f K", (double)count/1000.0 ) );
     317           0 :   else if( FD_LIKELY( count<1000000000UL ) ) FD_TEST( fd_cstr_printf_check( tmp, 64UL, NULL, "%.1f M", (double)count/1000000.0 ) );
     318             : 
     319           0 :   FD_TEST( fd_cstr_printf_check( buf, 64UL, NULL, "%12s", tmp ) );
     320           0 :   return buf;
     321           0 : }
     322             : 
     323             : static void
     324             : print_histogram_buckets( volatile ulong * metrics,
     325             :                          ulong offset,
     326             :                          int converter,
     327             :                          double histmin,
     328             :                          double histmax,
     329           0 :                          char * title ) {
     330           0 :   fd_histf_t hist[1];
     331             : 
     332             :   /* Create histogram structure only to get bucket edges for display */
     333           0 :   if( FD_LIKELY( converter == FD_METRICS_CONVERTER_SECONDS ) ) {
     334             :     /* For SLOT_COMPLETE_TIME: min=0.2, max=2.0 seconds */
     335           0 :     FD_TEST( fd_histf_new( hist, fd_metrics_convert_seconds_to_ticks( histmin ), fd_metrics_convert_seconds_to_ticks( histmax ) ) );
     336           0 :   } else if( FD_LIKELY( converter == FD_METRICS_CONVERTER_NONE ) ) {
     337             :     /* For non-time histograms, we'd need the actual min/max values */
     338           0 :     FD_TEST( fd_histf_new( hist, (ulong)histmin, (ulong)histmax ) );
     339           0 :   } else {
     340           0 :     FD_LOG_ERR(( "unknown converter %i", converter ));
     341           0 :   }
     342             : 
     343           0 :   printf( " +---------------------+--------------------+--------------+\n" );
     344           0 :   printf( " | %-19s |                    | Count        |\n", title );
     345           0 :   printf( " +---------------------+--------------------+--------------+\n" );
     346             : 
     347           0 :   ulong total_count = 0;
     348           0 :   for( ulong k = 0; k < FD_HISTF_BUCKET_CNT; k++ ) {
     349           0 :     ulong bucket_count = metrics[ offset + k ];
     350           0 :     total_count += bucket_count;
     351           0 :   }
     352             : 
     353           0 :   for( ulong k = 0; k < FD_HISTF_BUCKET_CNT; k++ ) {
     354             :     /* Get individual bucket count directly from metrics array */
     355           0 :     ulong bucket_count = metrics[ offset + k ];
     356             : 
     357           0 :     char * le_str;
     358           0 :     char le_buf[ 64 ];
     359           0 :     if( FD_UNLIKELY( k == FD_HISTF_BUCKET_CNT - 1UL ) ) {
     360           0 :       le_str = "+Inf";
     361           0 :     } else {
     362           0 :       ulong edge = fd_histf_right( hist, k );
     363           0 :       if( FD_LIKELY( converter == FD_METRICS_CONVERTER_SECONDS ) ) {
     364           0 :         double edgef = fd_metrics_convert_ticks_to_seconds( edge - 1 );
     365           0 :         FD_TEST( fd_cstr_printf_check( le_buf, sizeof( le_buf ), NULL, "%.3f", edgef ) );
     366           0 :       } else {
     367           0 :         FD_TEST( fd_cstr_printf_check( le_buf, sizeof( le_buf ), NULL, "%.3f", (double)(edge - 1) / 1000000.0 ) );
     368           0 :       }
     369           0 :       le_str = le_buf;
     370           0 :     }
     371             : 
     372           0 :     char count_buf[ 64 ];
     373           0 :     fmt_count( count_buf, bucket_count );
     374             : 
     375             :     /* Create visual bar - scale to max 20 characters */
     376           0 :     char bar_buf[ 22 ];
     377           0 :     if( bucket_count > 0 && total_count > 0 ) {
     378           0 :       ulong bar_length = (bucket_count * 22UL) / total_count;
     379           0 :       if( bar_length == 0 ) bar_length = 1;
     380           0 :       for( ulong i = 0; i < bar_length; i++ ) { bar_buf[ i ] = '|'; }
     381           0 :       bar_buf[ bar_length ] = '\0';
     382           0 :     } else {
     383           0 :       bar_buf[ 0 ] = '\0';
     384           0 :     }
     385             : 
     386           0 :     printf( " | %-19s | %-18s | %s |\n", le_str, bar_buf, count_buf );
     387           0 :   }
     388             : 
     389             :   /* Print sum and total count */
     390           0 :   char sum_buf[ 64 ];
     391           0 :   char avg_buf[ 64 ];
     392           0 :   if( FD_LIKELY( converter == FD_METRICS_CONVERTER_SECONDS ) ) {
     393           0 :     double sumf = fd_metrics_convert_ticks_to_seconds( metrics[ offset + FD_HISTF_BUCKET_CNT ] );
     394           0 :     FD_TEST( fd_cstr_printf_check( sum_buf, sizeof( sum_buf ), NULL, "%.6f", sumf ) );
     395           0 :     double avg = sumf / (double)total_count;
     396           0 :     FD_TEST( fd_cstr_printf_check( avg_buf, sizeof( avg_buf ), NULL, "%.6f", avg ) );
     397           0 :   } else {
     398           0 :     FD_TEST( fd_cstr_printf_check( sum_buf, sizeof( sum_buf ), NULL, "%lu", metrics[ offset + FD_HISTF_BUCKET_CNT ] ));
     399           0 :   }
     400             : 
     401           0 :   printf( " +---------------------+--------------------+---------------+\n" );
     402           0 :   printf( " | Sum: %-14s | Count: %-11lu | Avg: %-8s |\n", sum_buf, total_count, avg_buf );
     403           0 :   printf( " +---------------------+--------------------+---------------+\n" );
     404           0 : }
     405             : 
     406             : static fd_slot_metrics_t temp_slots[ FD_CATCHUP_METRICS_MAX ];
     407             : 
     408             : static void
     409           0 : print_catchup_slots( fd_wksp_t * repair_tile_wksp, ctx_t * repair_ctx, int verbose, int sort_by_slot ) {
     410           0 :   fd_repair_metrics_t * catchup = repair_ctx->slot_metrics;
     411           0 :   ulong catchup_gaddr = fd_wksp_gaddr_fast( repair_ctx->wksp, catchup );
     412           0 :   fd_repair_metrics_t * catchup_table = (fd_repair_metrics_t *)fd_wksp_laddr( repair_tile_wksp, catchup_gaddr );
     413           0 :   if( FD_LIKELY( sort_by_slot ) ) {
     414           0 :     fd_repair_metrics_print_sorted( catchup_table, verbose, temp_slots );
     415           0 :   } else {
     416           0 :     fd_repair_metrics_print( catchup_table, verbose );
     417           0 :   }
     418           0 : }
     419             : 
     420             : static fd_location_info_t * location_table;
     421             : static fd_pubkey_t peers_copy[ FD_ACTIVE_KEY_MAX ];
     422             : 
     423             : static ulong
     424           0 : sort_peers_by_latency( fd_policy_peer_t * active_table, fd_peer_dlist_t * peers_dlist, fd_peer_dlist_t * peers_wlist, fd_peer_t * peers_arr ) {
     425           0 :   ulong i = 0;
     426           0 :   fd_peer_dlist_iter_t iter = fd_peer_dlist_iter_fwd_init( peers_dlist, peers_arr );
     427           0 :   while( !fd_peer_dlist_iter_done( iter, peers_dlist, peers_arr ) ) {
     428           0 :     fd_peer_t * peer = fd_peer_dlist_iter_ele( iter, peers_dlist, peers_arr );
     429           0 :     if( FD_UNLIKELY( !peer ) ) break;
     430           0 :     peers_copy[ i++ ] = peer->identity;
     431           0 :     if( FD_UNLIKELY( i >= FD_ACTIVE_KEY_MAX ) ) break;
     432           0 :     iter = fd_peer_dlist_iter_fwd_next( iter, peers_dlist, peers_arr );
     433           0 :   }
     434           0 :   ulong fast_cnt = i;
     435           0 :   iter = fd_peer_dlist_iter_fwd_init( peers_wlist, peers_arr );
     436           0 :   while( !fd_peer_dlist_iter_done( iter, peers_wlist, peers_arr ) ) {
     437           0 :     fd_peer_t * peer = fd_peer_dlist_iter_ele( iter, peers_wlist, peers_arr );
     438           0 :     if( FD_UNLIKELY( !peer ) ) break;
     439           0 :     peers_copy[ i++ ] = peer->identity;
     440           0 :     if( FD_UNLIKELY( i >= FD_ACTIVE_KEY_MAX ) ) break;
     441           0 :     iter = fd_peer_dlist_iter_fwd_next( iter, peers_wlist, peers_arr );
     442           0 :   }
     443           0 :   FD_LOG_NOTICE(( "Fast peers cnt: %lu. Slow peers cnt: %lu.", fast_cnt, i - fast_cnt ));
     444             : 
     445           0 :   ulong peer_cnt = i;
     446           0 :   for( uint i = 0; i < peer_cnt - 1; i++ ) {
     447           0 :     int swapped = 0;
     448           0 :     for( uint j = 0; j < peer_cnt - 1 - i; j++ ) {
     449           0 :       fd_policy_peer_t const * active_j  = fd_policy_peer_map_query( active_table, peers_copy[ j ], NULL );
     450           0 :       fd_policy_peer_t const * active_j1 = fd_policy_peer_map_query( active_table, peers_copy[ j + 1 ], NULL );
     451             : 
     452             :       /* Skip peers with no responses */
     453           0 :       double latency_j  = 10e9;
     454           0 :       double latency_j1 = 10e9;
     455           0 :       if( FD_LIKELY( active_j  && active_j->res_cnt > 0  ) ) latency_j  = ((double)active_j->total_lat / (double)active_j->res_cnt);
     456           0 :       if( FD_LIKELY( active_j1 && active_j1->res_cnt > 0 ) ) latency_j1 = ((double)active_j1->total_lat / (double)active_j1->res_cnt);
     457             : 
     458             :       /* Swap if j has higher latency than j+1 */
     459           0 :       if( latency_j > latency_j1 ) {
     460           0 :         fd_pubkey_t temp    = peers_copy[ j ];
     461           0 :         peers_copy[ j ]     = peers_copy[ j + 1 ];
     462           0 :         peers_copy[ j + 1 ] = temp;
     463           0 :         swapped             = 1;
     464           0 :       }
     465           0 :     }
     466           0 :     if( !swapped ) break;
     467           0 :   }
     468           0 :   return peer_cnt;
     469           0 : }
     470             : 
     471             : static void
     472           0 : print_peer_location_latency( fd_wksp_t * repair_tile_wksp, ctx_t * tile_ctx ) {
     473           0 :   ulong              policy_gaddr  = fd_wksp_gaddr_fast( tile_ctx->wksp, tile_ctx->policy );
     474           0 :   fd_policy_t *      policy        = fd_wksp_laddr     ( repair_tile_wksp, policy_gaddr );
     475           0 :   ulong              peermap_gaddr = fd_wksp_gaddr_fast( tile_ctx->wksp, policy->peers.map  );
     476           0 :   ulong              peerarr_gaddr = fd_wksp_gaddr_fast( tile_ctx->wksp, policy->peers.pool );
     477           0 :   ulong              peerlst_gaddr = fd_wksp_gaddr_fast( tile_ctx->wksp, policy->peers.fast );
     478           0 :   ulong              peerwst_gaddr = fd_wksp_gaddr_fast( tile_ctx->wksp, policy->peers.slow );
     479           0 :   fd_policy_peer_t * peers_map     = (fd_policy_peer_t *)fd_wksp_laddr( repair_tile_wksp, peermap_gaddr );
     480           0 :   fd_peer_dlist_t *  peers_dlist   = (fd_peer_dlist_t *) fd_wksp_laddr( repair_tile_wksp, peerlst_gaddr );
     481           0 :   fd_peer_dlist_t *  peers_wlist   = (fd_peer_dlist_t *) fd_wksp_laddr( repair_tile_wksp, peerwst_gaddr );
     482           0 :   fd_peer_t *        peers_arr     = (fd_peer_t *)       fd_wksp_laddr( repair_tile_wksp, peerarr_gaddr );
     483             : 
     484           0 :   ulong peer_cnt = sort_peers_by_latency( peers_map, peers_dlist, peers_wlist, peers_arr );
     485           0 :   printf("\nPeer Location/Latency Information\n");
     486           0 :   printf( "| %-46s | %-7s | %-8s | %-8s | %-7s | %12s | %s\n", "Pubkey", "Req Cnt", "Req B/s", "Rx B/s", "Rx Rate", "Avg Latency", "Location Info" );
     487           0 :   for( uint i = 0; i < peer_cnt; i++ ) {
     488           0 :     fd_policy_peer_t const * active = fd_policy_peer_map_query( peers_map, peers_copy[ i ], NULL );
     489           0 :     if( FD_LIKELY( active && active->res_cnt > 0 ) ) {
     490           0 :       fd_location_info_t * info = fd_location_table_query( location_table, active->ip4, NULL );
     491           0 :       char * geolocation = info ? info->location : "Unknown";
     492           0 :       double peer_bps    = (double)(active->res_cnt * FD_SHRED_MIN_SZ) / ((double)(active->last_resp_ts - active->first_resp_ts) / 1e9);
     493           0 :       double req_bps     = (double)active->req_cnt * 202 / ((double)(active->last_req_ts - active->first_req_ts) / 1e9);
     494           0 :       printf( "%-5u | %-46s | %-7lu | %-8.2f | %-8.2f | %-7.2f | %10.3fms | %s\n", i, FD_BASE58_ENC_32_ALLOCA( &active->key ), active->req_cnt, req_bps, peer_bps, (double)active->res_cnt / (double)active->req_cnt, ((double)active->total_lat / (double)active->res_cnt) / 1e6, geolocation );
     495           0 :     }
     496           0 :   }
     497           0 :   printf("\n");
     498           0 :   fflush( stdout );
     499           0 : }
     500             : 
     501             : static void
     502           0 : read_iptable( char * iptable_path, fd_location_info_t * location_table ) {
     503           0 :   int iptable_fd = open( iptable_path, O_RDONLY );
     504           0 :   if( FD_UNLIKELY( iptable_fd<0 ) ) {
     505           0 :     FD_LOG_NOTICE(( "iptable file: %s", iptable_path ));
     506           0 :     return;
     507           0 :   }
     508             : 
     509             :   /* read iptable line by line */
     510           0 :   if( FD_LIKELY( iptable_fd>=0 ) ) {
     511           0 :     char line[ 256 ];
     512           0 :     uchar istream_buf[256];
     513           0 :     fd_io_buffered_istream_t istream[1];
     514           0 :     fd_io_buffered_istream_init( istream, iptable_fd, istream_buf, sizeof(istream_buf) );
     515           0 :     for(;;) {
     516           0 :       int err;
     517           0 :       if( !fd_io_fgets( line, sizeof(line), istream, &err ) ) break;
     518           0 :       fd_location_info_t location_info;
     519           0 :       sscanf( line, "%lu %[^\n]", &location_info.ip4_addr, location_info.location );
     520             :       //FD_LOG_NOTICE(( "inserting location info for ip4_addr %lu, location %s", location_info.ip4_addr, location_info.location ));
     521           0 :       fd_location_info_t * info = fd_location_table_insert( location_table, location_info.ip4_addr );
     522           0 :       if( FD_UNLIKELY( info==NULL ) ) break;
     523           0 :       memcpy( info->location, location_info.location, sizeof(info->location) );
     524           0 :     }
     525           0 :   }
     526           0 : }
     527             : 
     528             : static void
     529             : repair_ctx_wksp( args_t *          args,
     530             :                  config_t *        config,
     531             :                  ctx_t **          repair_ctx,
     532           0 :                  fd_topo_wksp_t ** repair_wksp ) {
     533           0 :   (void)args;
     534             : 
     535           0 :   fd_topo_t * topo = &config->topo;
     536           0 :   ulong wksp_id = fd_topo_find_wksp( topo, "repair" );
     537           0 :   if( FD_UNLIKELY( wksp_id==ULONG_MAX ) ) FD_LOG_ERR(( "repair workspace not found" ));
     538             : 
     539           0 :   fd_topo_wksp_t * _repair_wksp = &topo->workspaces[ wksp_id ];
     540             : 
     541           0 :   ulong tile_id = fd_topo_find_tile( topo, "repair", 0UL );
     542           0 :   if( FD_UNLIKELY( tile_id==ULONG_MAX ) ) FD_LOG_ERR(( "repair tile not found" ));
     543             : 
     544           0 :   fd_topo_join_workspace( topo, _repair_wksp, FD_SHMEM_JOIN_MODE_READ_ONLY );
     545             : 
     546             :   /* Access the repair tile scratch memory where repair_tile_ctx is stored */
     547           0 :   fd_topo_tile_t * tile = &topo->tiles[ tile_id ];
     548           0 :   void * scratch = fd_topo_obj_laddr( &config->topo, tile->tile_obj_id );
     549           0 :   if( FD_UNLIKELY( !scratch ) ) FD_LOG_ERR(( "Failed to access repair tile scratch memory" ));
     550             : 
     551           0 :   FD_SCRATCH_ALLOC_INIT( l, scratch );
     552           0 :   ctx_t * _repair_ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
     553             : 
     554           0 :   *repair_ctx  = _repair_ctx;
     555           0 :   *repair_wksp = _repair_wksp;
     556           0 : }
     557             : 
     558             : static void
     559             : repair_cmd_fn_catchup( args_t *   args,
     560           0 :                        config_t * config ) {
     561             : 
     562           0 :   memset( &config->topo, 0, sizeof(config->topo) );
     563           0 :   repair_topo( config );
     564             : 
     565           0 :   for( ulong i=0UL; i<config->topo.tile_cnt; i++ ) {
     566           0 :     fd_topo_tile_t * tile = &config->topo.tiles[ i ];
     567           0 :     if( FD_UNLIKELY( !strcmp( tile->name, "scap" ) ) ) {
     568             :       /* This is not part of the config, and it must be set manually
     569             :          on purpose as a safety mechanism. */
     570           0 :       tile->shredcap.enable_publish_stake_weights = 1;
     571           0 :       strncpy( tile->shredcap.manifest_path, args->repair.manifest_path, PATH_MAX );
     572           0 :     }
     573           0 :     if( FD_UNLIKELY( !strcmp( tile->name, "repair" ) ) ) {
     574           0 :       tile->repair.end_slot = args->repair.end_slot;
     575           0 :     }
     576           0 :   }
     577             : 
     578           0 :   FD_LOG_NOTICE(( "Repair catchup init" ));
     579           0 :   fd_topo_print_log( 1, &config->topo );
     580             : 
     581           0 :   args_t configure_args = {
     582           0 :     .configure.command = CONFIGURE_CMD_INIT,
     583           0 :   };
     584           0 :   for( ulong i=0UL; STAGES[ i ]; i++ ) {
     585           0 :     configure_args.configure.stages[ i ] = STAGES[ i ];
     586           0 :   }
     587           0 :   configure_cmd_fn( &configure_args, config );
     588           0 :   if( 0==strcmp( config->net.provider, "xdp" ) ) {
     589           0 :     fd_topo_install_xdp_simple( &config->topo, config->net.bind_address_parsed );
     590           0 :   }
     591             : 
     592           0 :   run_firedancer_init( config, 1, 0 );
     593             : 
     594           0 :   fd_log_private_shared_lock[ 1 ] = 0;
     595           0 :   fd_topo_join_workspaces( &config->topo, FD_SHMEM_JOIN_MODE_READ_WRITE );
     596             : 
     597           0 :   fd_topo_fill( &config->topo );
     598             : 
     599           0 :   ulong repair_tile_idx = fd_topo_find_tile( &config->topo, "repair", 0UL );
     600           0 :   FD_TEST( repair_tile_idx!=ULONG_MAX );
     601           0 :   fd_topo_tile_t * repair_tile = &config->topo.tiles[ repair_tile_idx ];
     602             : 
     603           0 :   ulong wksp_id = fd_topo_find_wksp( &config->topo, "repair" );
     604           0 :   if( FD_UNLIKELY( wksp_id==ULONG_MAX ) ) FD_LOG_ERR(( "repair workspace not found" ));
     605           0 :   fd_topo_wksp_t * repair_wksp = &config->topo.workspaces[ wksp_id ];
     606           0 :   fd_topo_join_workspace( &config->topo, repair_wksp, FD_SHMEM_JOIN_MODE_READ_WRITE );
     607             : 
     608           0 :   ulong shred_tile_idx = fd_topo_find_tile( &config->topo, "shred", 0UL );
     609           0 :   FD_TEST( shred_tile_idx!=ULONG_MAX );
     610           0 :   fd_topo_tile_t * shred_tile = &config->topo.tiles[ shred_tile_idx ];
     611             : 
     612           0 :   volatile ulong * shred_metrics = fd_metrics_tile( shred_tile->metrics );
     613           0 :   FD_TEST( shred_metrics );
     614             : 
     615           0 :   volatile ulong * repair_metrics = fd_metrics_tile( repair_tile->metrics );
     616           0 :   FD_TEST( repair_metrics );
     617             : 
     618             :   /* Access the repair tile scratch memory where repair_tile_ctx is stored */
     619           0 :   void * scratch = fd_topo_obj_laddr( &config->topo, repair_tile->tile_obj_id );
     620           0 :   if( FD_UNLIKELY( !scratch ) ) FD_LOG_ERR(( "Failed to access repair tile scratch memory" ));
     621           0 :   FD_SCRATCH_ALLOC_INIT( l, scratch );
     622           0 :   ctx_t * repair_ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
     623             : 
     624             :   /* Collect all net tiles and their repair_net link metrics */
     625           0 :   ulong net_tile_cnt = config->layout.net_tile_count;
     626           0 :   volatile ulong ** repair_net_links = aligned_alloc( 8UL, net_tile_cnt * sizeof(volatile ulong*) );
     627           0 :   volatile ulong ** net_shred_links  = aligned_alloc( 8UL, net_tile_cnt * sizeof(volatile ulong*) );
     628           0 :   FD_TEST( repair_net_links );
     629           0 :   FD_TEST( net_shred_links );
     630             : 
     631           0 :   for( ulong i = 0UL; i < net_tile_cnt; i++ ) {
     632           0 :     ulong tile_idx = fd_topo_find_tile( &config->topo, "net", i );
     633           0 :     if( FD_UNLIKELY( tile_idx == ULONG_MAX ) ) FD_LOG_ERR(( "net tile %lu not found", i ));
     634           0 :     fd_topo_tile_t * tile = &config->topo.tiles[ tile_idx ];
     635             : 
     636           0 :     ulong repair_net_in_idx = fd_topo_find_tile_in_link( &config->topo, tile, "repair_net", 0UL );
     637           0 :     if( FD_UNLIKELY( repair_net_in_idx == ULONG_MAX ) ) {
     638           0 :       FD_LOG_ERR(( "repair_net link not found for net tile %lu", i ));
     639           0 :     }
     640           0 :     repair_net_links[i] = fd_metrics_link_in( tile->metrics, repair_net_in_idx );
     641           0 :     FD_TEST( repair_net_links[i] );
     642             : 
     643           0 :     ulong shred_tile_idx = fd_topo_find_tile( &config->topo, "shred", 0 );
     644           0 :     if( FD_UNLIKELY( shred_tile_idx == ULONG_MAX ) ) FD_LOG_ERR(( "shred tile 0 not found" ));
     645           0 :     fd_topo_tile_t * shred_tile = &config->topo.tiles[ shred_tile_idx ];
     646             : 
     647           0 :     ulong shred_out_in_idx = fd_topo_find_tile_in_link( &config->topo, shred_tile, "net_shred", i );
     648           0 :     if( FD_UNLIKELY( shred_out_in_idx == ULONG_MAX ) ) FD_LOG_ERR(( "net_shred link not found for shred tile 0" ));
     649           0 :     net_shred_links[i] = fd_metrics_link_in( shred_tile->metrics, shred_out_in_idx );
     650           0 :     FD_TEST( net_shred_links[i] );
     651           0 :   }
     652             : 
     653           0 :   FD_LOG_NOTICE(( "Repair catchup run" ));
     654             : 
     655           0 :   ulong    shred_out_link_idx = fd_topo_find_link( &config->topo, "shred_out", 0UL );
     656           0 :   FD_TEST( shred_out_link_idx!=ULONG_MAX );
     657           0 :   fd_topo_link_t * shred_out_link   = &config->topo.links[ shred_out_link_idx  ];
     658           0 :   fd_frag_meta_t * shred_out_mcache = shred_out_link->mcache;
     659             : 
     660           0 :   ulong turbine_slot0 = 0;
     661           0 :   long  last_print    = fd_log_wallclock();
     662           0 :   ulong last_sent     = 0UL;
     663             : 
     664           0 :   if( FD_LIKELY( args->repair.end_slot ) ) turbine_slot0 = args->repair.end_slot;
     665             : 
     666           0 :   fd_topo_run_single_process( &config->topo, 0, config->uid, config->gid, fdctl_tile_run );
     667           0 :   for(;;) {
     668             : 
     669           0 :     if( FD_UNLIKELY( !turbine_slot0 ) ) {
     670           0 :       fd_frag_meta_t * frag = &shred_out_mcache[0]; /* hack to get first frag */
     671           0 :       if ( frag->sz > 0 ) {
     672           0 :         turbine_slot0 = fd_disco_shred_out_shred_sig_slot( frag->sig );
     673           0 :         FD_LOG_NOTICE(("turbine_slot0: %lu", turbine_slot0));
     674           0 :       }
     675           0 :     }
     676             : 
     677             :     /* print metrics */
     678             : 
     679           0 :     long now = fd_log_wallclock();
     680           0 :     int catchup_finished = 0;
     681           0 :     if( FD_UNLIKELY( now - last_print > 1e9L ) ) {
     682           0 :       char buf2[ 64 ];
     683           0 :       ulong rcvd = shred_metrics [ MIDX( COUNTER, SHRED,  SHRED_REPAIR_RCV ) ];
     684           0 :       ulong sent = repair_metrics[ MIDX( COUNTER, REPAIR, SENT_PKT_TYPES_NEEDED_WINDOW ) ] +
     685           0 :                    repair_metrics[ MIDX( COUNTER, REPAIR, SENT_PKT_TYPES_NEEDED_HIGHEST_WINDOW ) ] +
     686           0 :                    repair_metrics[ MIDX( COUNTER, REPAIR, SENT_PKT_TYPES_NEEDED_ORPHAN ) ];
     687           0 :       ulong sign_tile_unavail = repair_metrics[ MIDX( COUNTER, REPAIR, SIGN_TILE_UNAVAIL ) ];
     688           0 :       printf(" Requests received: (%lu/%lu) %.1f%% \n", rcvd, sent, (double)rcvd / (double)sent * 100.0 );
     689           0 :       printf( " +---------------+--------------+\n" );
     690           0 :       printf( " | Request Type  | Count        |\n" );
     691           0 :       printf( " +---------------+--------------+\n" );
     692           0 :       printf( " | Orphan        | %s |\n", fmt_count( buf2, repair_metrics[ MIDX( COUNTER, REPAIR, SENT_PKT_TYPES_NEEDED_ORPHAN         ) ] ) );
     693           0 :       printf( " | HighestWindow | %s |\n", fmt_count( buf2, repair_metrics[ MIDX( COUNTER, REPAIR, SENT_PKT_TYPES_NEEDED_HIGHEST_WINDOW ) ] ) );
     694           0 :       printf( " | Index         | %s |\n", fmt_count( buf2, repair_metrics[ MIDX( COUNTER, REPAIR, SENT_PKT_TYPES_NEEDED_WINDOW         ) ] ) );
     695           0 :       printf( " +---------------+--------------+\n" );
     696           0 :       printf( " Send Pkt Rate: %s pps\n",  fmt_count( buf2, (ulong)((sent - last_sent)*1e9L / (now - last_print) ) ) );
     697           0 :       last_sent = sent;
     698             : 
     699             :       /* Sum overrun across all net tiles connected to repair_net */
     700           0 :       ulong total_overrun = repair_net_links[0][ MIDX( COUNTER, LINK, OVERRUN_POLLING_FRAG_COUNT ) ]; /* coarse double counting prevention */
     701           0 :       ulong total_consumed = 0UL;
     702           0 :       for( ulong i = 0UL; i < net_tile_cnt; i++ ) {
     703           0 :         volatile ulong * ovar_net_metrics = repair_net_links[i];
     704           0 :         total_overrun  += ovar_net_metrics[ MIDX( COUNTER, LINK, OVERRUN_READING_FRAG_COUNT ) ];
     705           0 :         total_consumed += ovar_net_metrics[ MIDX( COUNTER, LINK, CONSUMED_COUNT ) ]; /* consumed is incremented after after_frag is called */
     706           0 :       }
     707           0 :       printf( " Outgoing requests overrun:  %s\n", fmt_count( buf2, total_overrun  ) );
     708           0 :       printf( " Outgoing requests consumed: %s\n", fmt_count( buf2, total_consumed ) );
     709             : 
     710           0 :       total_overrun  = net_shred_links[0][ MIDX( COUNTER, LINK, OVERRUN_READING_FRAG_COUNT ) ];
     711           0 :       total_consumed = 0UL;
     712           0 :       for( ulong i = 0UL; i < net_tile_cnt; i++ ) {
     713           0 :         volatile ulong * ovar_net_metrics = net_shred_links[i];
     714           0 :         total_overrun  += ovar_net_metrics[ MIDX( COUNTER, LINK, OVERRUN_READING_FRAG_COUNT ) ];
     715           0 :         total_consumed += ovar_net_metrics[ MIDX( COUNTER, LINK, CONSUMED_COUNT ) ]; /* shred frag filtering happens manually in after_frag, so no need to index every shred_tile. */
     716           0 :       }
     717             : 
     718           0 :       printf( " Incoming shreds overrun:    %s\n", fmt_count( buf2, total_overrun ) );
     719           0 :       printf( " Incoming shreds consumed:   %s\n", fmt_count( buf2, total_consumed ) );
     720             : 
     721           0 :       print_histogram_buckets( repair_metrics,
     722           0 :                                MIDX( HISTOGRAM, REPAIR, RESPONSE_LATENCY ),
     723           0 :                                FD_METRICS_CONVERTER_NONE,
     724           0 :                                FD_METRICS_HISTOGRAM_REPAIR_RESPONSE_LATENCY_MIN,
     725           0 :                                FD_METRICS_HISTOGRAM_REPAIR_RESPONSE_LATENCY_MAX,
     726           0 :                                "Response Latency" );
     727             : 
     728           0 :       printf(" Sign tile unavailable: %lu\n", sign_tile_unavail);
     729           0 :       printf(" Repair Peers: %lu\n", repair_metrics[ MIDX( COUNTER, REPAIR, REQUEST_PEERS ) ] );
     730           0 :       ulong slots_behind = turbine_slot0 > repair_metrics[ MIDX( COUNTER, REPAIR, REPAIRED_SLOTS ) ] ? turbine_slot0 - repair_metrics[ MIDX( COUNTER, REPAIR, REPAIRED_SLOTS ) ] : 0;
     731           0 :       printf(" Repaired slots: %lu/%lu  (slots behind: %lu)\n", repair_metrics[ MIDX( COUNTER, REPAIR, REPAIRED_SLOTS ) ], turbine_slot0, slots_behind );
     732           0 :       if( turbine_slot0 && !slots_behind && ( !args->repair.end_slot || FD_VOLATILE_CONST( repair_ctx->profiler.complete ) ) ) {
     733           0 :         catchup_finished = 1;
     734           0 :       }
     735             :       /* Print histogram buckets similar to Prometheus format */
     736           0 :       print_histogram_buckets( repair_metrics,
     737           0 :                                MIDX( HISTOGRAM, REPAIR, SLOT_COMPLETE_TIME ),
     738           0 :                                FD_METRICS_CONVERTER_SECONDS,
     739           0 :                                FD_METRICS_HISTOGRAM_REPAIR_SLOT_COMPLETE_TIME_MIN,
     740           0 :                                FD_METRICS_HISTOGRAM_REPAIR_SLOT_COMPLETE_TIME_MAX,
     741           0 :                                "Slot Complete Time" );
     742             : 
     743           0 :       printf("\n");
     744           0 :       fflush( stdout );
     745           0 :       last_print = now;
     746           0 :     }
     747             : 
     748           0 :     if( FD_UNLIKELY( catchup_finished ) ) {
     749             :       /* repair cmd owned memory */
     750           0 :       location_table = fd_location_table_join( fd_location_table_new( location_table_mem ) );
     751           0 :       read_iptable( args->repair.iptable_path, location_table );
     752           0 :       print_peer_location_latency( repair_wksp->wksp, repair_ctx );
     753           0 :       print_catchup_slots( repair_wksp->wksp, repair_ctx, 0, 1 );
     754           0 :       FD_LOG_NOTICE(("Catchup to slot %lu completed successfully", turbine_slot0));
     755           0 :       fd_sys_util_exit_group( 0 );
     756           0 :     }
     757           0 :   }
     758           0 : }
     759             : 
     760             : static void
     761             : repair_cmd_fn_forest( args_t *   args,
     762           0 :                            config_t * config ) {
     763           0 :   ctx_t *          repair_ctx;
     764           0 :   fd_topo_wksp_t * repair_wksp;
     765           0 :   repair_ctx_wksp( args, config, &repair_ctx, &repair_wksp );
     766             : 
     767           0 :   ulong forest_gaddr = fd_wksp_gaddr_fast( repair_ctx->wksp, repair_ctx->forest );
     768           0 :   fd_forest_t * forest = (fd_forest_t *)fd_wksp_laddr( repair_wksp->wksp, forest_gaddr );
     769             : 
     770           0 :   for( ;; ) {
     771           0 :     fd_forest_print( forest );
     772           0 :     sleep( 1 );
     773           0 :   }
     774           0 : }
     775             : 
     776             : static void
     777             : repair_cmd_fn_inflight( args_t *   args,
     778           0 :                         config_t * config ) {
     779           0 :   ctx_t *          repair_ctx;
     780           0 :   fd_topo_wksp_t * repair_wksp;
     781           0 :   repair_ctx_wksp( args, config, &repair_ctx, &repair_wksp );
     782             : 
     783           0 :   ulong            inflights_gaddr = fd_wksp_gaddr_fast( repair_ctx->wksp, repair_ctx->inflights );
     784           0 :   fd_inflights_t * inflights       = (fd_inflights_t *)fd_wksp_laddr( repair_wksp->wksp, inflights_gaddr );
     785             : 
     786           0 :   ulong inflight_map_off  = (ulong)inflights->map  - (ulong)repair_ctx->inflights;
     787           0 :   ulong inflight_pool_off = (ulong)inflights->pool - (ulong)repair_ctx->inflights;
     788             : 
     789           0 :   fd_inflight_map_t * inflight_map  = (fd_inflight_map_t *)fd_wksp_laddr( repair_wksp->wksp, inflights_gaddr + inflight_map_off  );
     790           0 :   fd_inflight_t *     inflight_pool = (fd_inflight_t *)    fd_wksp_laddr( repair_wksp->wksp, inflights_gaddr + inflight_pool_off );
     791             : 
     792           0 :   for( ;; ) {
     793           0 :     fd_inflights_print( inflight_map, inflight_pool );
     794           0 :     sleep( 1 );
     795           0 :   }
     796           0 : }
     797             : 
     798             : static void
     799             : repair_cmd_fn_requests( args_t *   args,
     800           0 :                         config_t * config ) {
     801           0 :   ctx_t *          repair_ctx;
     802           0 :   fd_topo_wksp_t * repair_wksp;
     803           0 :   repair_ctx_wksp( args, config, &repair_ctx, &repair_wksp );
     804             : 
     805           0 :   fd_forest_t *          forest = fd_forest_join( fd_wksp_laddr( repair_wksp->wksp, fd_wksp_gaddr_fast( repair_ctx->wksp, repair_ctx->forest ) ) );
     806           0 :   fd_forest_reqslist_t * dlist  = fd_forest_reqslist( forest );
     807           0 :   fd_forest_ref_t *      pool   = fd_forest_reqspool( forest );
     808             : 
     809           0 :   for( ;; ) {
     810           0 :     printf("%-15s %-12s %-12s %-12s %-20s %-12s %-10s\n",
     811           0 :             "Slot", "Consumed Idx", "Buffered Idx", "Complete Idx",
     812           0 :             "First Shred Timestamp", "Turbine Cnt", "Repair Cnt");
     813           0 :     printf("%-15s %-12s %-12s %-12s %-20s %-12s %-10s\n",
     814           0 :             "---------------", "------------", "------------", "------------",
     815           0 :             "--------------------", "------------", "----------");
     816           0 :     for( fd_forest_reqslist_iter_t iter = fd_forest_reqslist_iter_fwd_init( dlist, pool );
     817           0 :         !fd_forest_reqslist_iter_done( iter, dlist, pool );
     818           0 :         iter = fd_forest_reqslist_iter_fwd_next( iter, dlist, pool ) ) {
     819           0 :       fd_forest_ref_t * req = fd_forest_reqslist_iter_ele( iter, dlist, pool );
     820           0 :       fd_forest_blk_t * blk = fd_forest_pool_ele( fd_forest_pool( forest ), req->idx );
     821             : 
     822           0 :       printf("%-15lu %-12u %-12u %-12u %-20ld %-12u %-10u\n",
     823           0 :               blk->slot,
     824           0 :               blk->consumed_idx,
     825           0 :               blk->buffered_idx,
     826           0 :               blk->complete_idx,
     827           0 :               blk->first_shred_ts,
     828           0 :               blk->turbine_cnt,
     829           0 :               blk->repair_cnt);
     830           0 :     }
     831           0 :     printf("\n");
     832           0 :     sleep( 1 );
     833           0 :   }
     834           0 : }
     835             : 
     836             : static void
     837             : repair_cmd_fn_waterfall( args_t *   args,
     838           0 :                          config_t * config ) {
     839             : 
     840           0 :   fd_topo_t * topo    = &config->topo;
     841           0 :   ulong       wksp_id = fd_topo_find_wksp( topo, "repair" );
     842           0 :   if( FD_UNLIKELY( wksp_id==ULONG_MAX ) ) FD_LOG_ERR(( "repair workspace not found" ));
     843           0 :   fd_topo_wksp_t * repair_wksp = &topo->workspaces[ wksp_id ];
     844           0 :   fd_topo_join_workspace( topo, repair_wksp, FD_SHMEM_JOIN_MODE_READ_ONLY );
     845             : 
     846             :   /* Access the repair tile scratch memory where repair_tile_ctx is stored */
     847           0 :   ulong tile_id = fd_topo_find_tile( topo, "repair", 0UL );
     848           0 :   if( FD_UNLIKELY( tile_id==ULONG_MAX ) ) FD_LOG_ERR(( "repair tile not found" ));
     849           0 :   fd_topo_tile_t * tile = &topo->tiles[ tile_id ];
     850           0 :   void * scratch = fd_topo_obj_laddr( &config->topo, tile->tile_obj_id );
     851           0 :   if( FD_UNLIKELY( !scratch ) ) FD_LOG_ERR(( "Failed to access repair tile scratch memory" ));
     852             : 
     853           0 :   FD_SCRATCH_ALLOC_INIT( l, scratch );
     854           0 :   ctx_t * repair_ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
     855             : 
     856             :   /* catchup cmd owned memory */
     857           0 :   location_table = fd_location_table_join( fd_location_table_new( location_table_mem ) );
     858           0 :   read_iptable( args->repair.iptable_path, location_table );
     859             : 
     860             :   // Add terminal setup here - same as monitor.c
     861           0 :   atexit( restore_terminal );
     862           0 :   if( FD_UNLIKELY( 0!=tcgetattr( STDIN_FILENO, &termios_backup ) ) ) {
     863           0 :     FD_LOG_ERR(( "tcgetattr(STDIN_FILENO) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     864           0 :   }
     865             : 
     866             :   /* Disable character echo and line buffering */
     867           0 :   struct termios term = termios_backup;
     868           0 :   term.c_lflag &= (tcflag_t)~(ICANON | ECHO);
     869           0 :   if( FD_UNLIKELY( 0!=tcsetattr( STDIN_FILENO, TCSANOW, &term ) ) ) {
     870           0 :     FD_LOG_WARNING(( "tcsetattr(STDIN_FILENO) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     871           0 :   }
     872             : 
     873           0 :   int  catchup_verbose = 0;
     874           0 :   long last_print = 0;
     875           0 :   for( ;; ) {
     876           0 :     int c = fd_getchar();
     877           0 :     if( FD_UNLIKELY( c=='i'    ) ) catchup_verbose = !catchup_verbose;
     878           0 :     if( FD_UNLIKELY( c=='\x04' ) ) break; /* Ctrl-D */
     879             : 
     880           0 :     long now = fd_log_wallclock();
     881           0 :     if( FD_UNLIKELY( now - last_print > 1e9L ) ) {
     882           0 :       last_print = now;
     883           0 :       print_catchup_slots( repair_wksp->wksp, repair_ctx, catchup_verbose, args->repair.sort_by_slot );
     884           0 :       printf( "catchup slots | Use 'i' to toggle extra slot information" TEXT_NEWLINE );
     885           0 :       fflush( stdout );
     886             : 
     887             :       /* Peer location latency is not that useful post catchup, and also
     888             :          requires some concurrent dlist iteration, so only print it when
     889             :          in catchup mode. */
     890           0 :     }
     891           0 :   }
     892           0 : }
     893             : 
     894             : static void
     895             : repair_cmd_fn_peers( args_t *   args,
     896           0 :                      config_t * config ) {
     897           0 :   ctx_t *          repair_ctx;
     898           0 :   fd_topo_wksp_t * repair_wksp;
     899           0 :   repair_ctx_wksp( args, config, &repair_ctx, &repair_wksp );
     900             : 
     901           0 :   fd_policy_t * policy = fd_wksp_laddr( repair_wksp->wksp, fd_wksp_gaddr_fast( repair_ctx->wksp, repair_ctx->policy ) );
     902             : 
     903           0 :   fd_peer_dlist_t *  best_dlist  = fd_wksp_laddr( repair_wksp->wksp, fd_wksp_gaddr_fast( repair_ctx->wksp, policy->peers.fast ) );
     904           0 :   fd_peer_dlist_t *  worst_dlist = fd_wksp_laddr( repair_wksp->wksp, fd_wksp_gaddr_fast( repair_ctx->wksp, policy->peers.slow ) );
     905           0 :   fd_peer_t *        pool        = fd_wksp_laddr( repair_wksp->wksp, fd_wksp_gaddr_fast( repair_ctx->wksp, policy->peers.pool ) );
     906           0 :   fd_policy_peer_t * peers_map   = fd_wksp_laddr( repair_wksp->wksp, fd_wksp_gaddr_fast( repair_ctx->wksp, policy->peers.map ) );
     907             : 
     908             : 
     909           0 :   printf("FAST REPAIR PEERS (latency < 80ms)\n");
     910           0 :   int i = 1;
     911           0 :   for( fd_peer_dlist_iter_t iter = fd_peer_dlist_iter_fwd_init( best_dlist, pool );
     912           0 :         !fd_peer_dlist_iter_done( iter, best_dlist, pool );
     913           0 :         iter = fd_peer_dlist_iter_fwd_next( iter, best_dlist, pool ) ) {
     914           0 :       fd_peer_t * peer = fd_peer_dlist_iter_ele( iter, best_dlist, pool );
     915           0 :       FD_BASE58_ENCODE_32_BYTES( peer->identity.key, p );
     916           0 :       printf(" %d. %s\n", i, p );
     917           0 :       i++;
     918           0 :   }
     919             : 
     920           0 :   printf("SLOW REPAIR PEERS (latency > 80ms)\n");
     921           0 :   i = 1;
     922           0 :   for( fd_peer_dlist_iter_t iter = fd_peer_dlist_iter_fwd_init( worst_dlist, pool );
     923           0 :         !fd_peer_dlist_iter_done( iter, worst_dlist, pool );
     924           0 :         iter = fd_peer_dlist_iter_fwd_next( iter, worst_dlist, pool ) ) {
     925           0 :       fd_peer_t * peer = fd_peer_dlist_iter_ele( iter, worst_dlist, pool );
     926           0 :       FD_BASE58_ENCODE_32_BYTES( peer->identity.key, p );
     927           0 :       printf(" %d. %s\n", i, p);
     928           0 :       i++;
     929           0 :   }
     930             : 
     931           0 :   for ( ulong i = 0; i < fd_policy_peer_map_slot_cnt( peers_map ); i++ ) {
     932           0 :     fd_policy_peer_t * peer = &peers_map[ i ];
     933           0 :     FD_TEST( fd_peer_pool_idx_test( pool, peer->pool_idx ) );
     934           0 :   }
     935             : 
     936             :   /* Specific peer info
     937             :   fd_hash_t key = { .ul = {15638155844970609479UL, 7058397130238410853UL,
     938             :     953861879773611379UL, 1223280701789465918UL } };
     939             :   fd_policy_peer_t * peer = fd_policy_peer_map_query( peers_map, key, NULL );
     940             :   if( FD_LIKELY( peer ) ) {
     941             :     printf("Peer info:\n");
     942             :     printf("  Key: %s\n", FD_BASE58_ENC_32_ALLOCA( &peer->key ));
     943             :     printf("  Req Cnt: %lu\n", peer->req_cnt );
     944             :     printf("  Res Cnt: %lu\n", peer->res_cnt );
     945             :     printf("  First Req Ts: %ld\n", peer->first_req_ts );
     946             :     printf("  Last Req Ts: %ld\n",  peer->last_req_ts );
     947             :     printf("  Pool Index: %lu\n",   peer->pool_idx );
     948             :   } */
     949           0 : }
     950             : 
     951             : 
     952             : static const char * HELP =
     953             :   "\n\n"
     954             :   "usage: repair [-h] {catchup,forest,waterfall} ...\n"
     955             :   "\n"
     956             :   "positional arguments:\n"
     957             :   "  {catchup,forest,inflight,requests,waterfall}\n"
     958             :   "    catchup             runs Firedancer with a reduced topology that only repairs slots until catchup\n"
     959             :   "    forest              prints the repair forest\n"
     960             :   "    inflight            prints the inflight repairs\n"
     961             :   "    requests            prints the queued repair requests\n"
     962             :   "    waterfall           prints a waterfall diagram of recent slot completion times and response latencies\n"
     963             :   "    peers               prints list of slow and fast repair peers\n"
     964             :   "\n"
     965             :   "optional arguments:\n"
     966             :   "  -h, --help            show this help message and exit\n";
     967             : 
     968             : static const char * CATCHUP_HELP =
     969             :   "\n\n"
     970             :   "usage: repair catchup [-h] [--manifest-path MANIFEST_PATH] [--iptable-path IPTABLE_PATH] [--sort-by-slot]\n"
     971             :   "\n"
     972             :   "required arguments:\n"
     973             :   "  --manifest-path MANIFEST_PATH\n"
     974             :   "                        path to manifest file\n"
     975             :   "\n"
     976             :   "optional arguments:\n"
     977             :   "  -h, --help            show this help message and exit\n"
     978             :   "  --end-slot END_SLOT   slot to catchup to (generally should be a rooted slot)\n"
     979             :   "  --iptable-path IPTABLE_PATH\n"
     980             :   "                        path to iptable file\n"
     981             :   "  --sort-by-slot        sort results by slot\n";
     982             : 
     983             : static const char * FOREST_HELP =
     984             :   "\n\n"
     985             :   "usage: repair forest [-h]\n"
     986             :   "\n"
     987             :   "optional arguments:\n"
     988             :   "  -h, --help            show this help message and exit\n"
     989             :   "  --slot SLOT           specific forest slot to drill into\n";
     990             : 
     991             : static const char * INFLIGHT_HELP =
     992             :   "\n\n"
     993             :   "usage: repair inflight [-h]\n"
     994             :   "\n"
     995             :   "optional arguments:\n"
     996             :   "  -h, --help            show this help message and exit";
     997             : 
     998             : static const char * REQUESTS_HELP =
     999             :   "\n\n"
    1000             :   "usage: repair requests [-h]\n"
    1001             :   "\n"
    1002             :   "optional arguments:\n"
    1003             :   "  -h, --help            show this help message and exit\n";
    1004             : 
    1005             : static const char * WATERFALL_HELP =
    1006             :   "\n\n"
    1007             :   "usage: repair waterfall [-h] [--iptable IPTABLE_PATH] [--sort-by-slot]\n"
    1008             :   "\n"
    1009             :   "optional arguments:\n"
    1010             :   "  -h, --help            show this help message and exit\n"
    1011             :   "  --iptable IPTABLE_PATH\n"
    1012             :   "                        path to iptable file\n"
    1013             :   "  --sort-by-slot        sort results by slot\n";
    1014             : 
    1015             : static const char * PEERS_HELP =
    1016             :   "\n\n"
    1017             :   "usage: repair peers [-h]\n"
    1018             :   "\n"
    1019             :   "optional arguments:\n"
    1020             :   "  -h, --help            show this help message and exit\n";
    1021             : void
    1022           0 : repair_cmd_help( char const * arg ) {
    1023           0 :   if      ( FD_LIKELY( !arg                        ) ) FD_LOG_NOTICE(( "%s", HELP           ));
    1024           0 :   else if ( FD_LIKELY( !strcmp( arg, "catchup"   ) ) ) FD_LOG_NOTICE(( "%s", CATCHUP_HELP   ));
    1025           0 :   else if ( FD_LIKELY( !strcmp( arg, "forest"    ) ) ) FD_LOG_NOTICE(( "%s", FOREST_HELP    ));
    1026           0 :   else if ( FD_LIKELY( !strcmp( arg, "inflight"  ) ) ) FD_LOG_NOTICE(( "%s", INFLIGHT_HELP  ));
    1027           0 :   else if ( FD_LIKELY( !strcmp( arg, "requests"  ) ) ) FD_LOG_NOTICE(( "%s", REQUESTS_HELP  ));
    1028           0 :   else if ( FD_LIKELY( !strcmp( arg, "waterfall" ) ) ) FD_LOG_NOTICE(( "%s", WATERFALL_HELP ));
    1029           0 :   else if ( FD_LIKELY( !strcmp( arg, "peers"     ) ) ) FD_LOG_NOTICE(( "%s", PEERS_HELP     ));
    1030           0 :   else                                                 FD_LOG_NOTICE(( "%s", HELP           ));
    1031           0 : }
    1032             : 
    1033             : void
    1034             : repair_cmd_args( int *    pargc,
    1035             :                  char *** pargv,
    1036           0 :                  args_t * args ) {
    1037             : 
    1038             :   /* help */
    1039             : 
    1040           0 :   args->repair.help = fd_env_strip_cmdline_contains( pargc, pargv, "--help" );
    1041           0 :   args->repair.help = args->repair.help || fd_env_strip_cmdline_contains( pargc, pargv, "-h" );
    1042             : 
    1043             :   /* positional arg */
    1044             : 
    1045           0 :   args->repair.pos_arg = (*pargv)[0];
    1046           0 :   if( FD_UNLIKELY( !args->repair.pos_arg ) ) {
    1047           0 :     args->repair.help = 1;
    1048           0 :     return;
    1049           0 :   }
    1050             : 
    1051             :   /* required args */
    1052             : 
    1053           0 :   char const * manifest_path = fd_env_strip_cmdline_cstr    ( pargc, pargv, "--manifest-path", NULL, NULL      );
    1054             : 
    1055             :   /* optional args */
    1056             : 
    1057           0 :   char const * iptable_path  = fd_env_strip_cmdline_cstr    ( pargc, pargv, "--iptable",       NULL, NULL      );
    1058           0 :   ulong        slot          = fd_env_strip_cmdline_ulong   ( pargc, pargv, "--slot",          NULL, ULONG_MAX );
    1059           0 :   int          sort_by_slot  = fd_env_strip_cmdline_contains( pargc, pargv, "--sort-by-slot"                   );
    1060           0 :   ulong        end_slot      = fd_env_strip_cmdline_ulong   ( pargc, pargv, "--end-slot",      NULL, 0         );
    1061             : 
    1062           0 :   if( FD_UNLIKELY( !strcmp( args->repair.pos_arg, "catchup" ) && !manifest_path ) ) {
    1063           0 :     args->repair.help = 1;
    1064           0 :     return;
    1065           0 :   } else {
    1066           0 :     (*pargc)--;
    1067           0 :   }
    1068             : 
    1069           0 :   fd_cstr_fini( fd_cstr_append_cstr_safe( fd_cstr_init( args->repair.manifest_path ), manifest_path, sizeof(args->repair.manifest_path)-1UL ) );
    1070           0 :   fd_cstr_fini( fd_cstr_append_cstr_safe( fd_cstr_init( args->repair.iptable_path ),  iptable_path,  sizeof(args->repair.iptable_path )-1UL ) );
    1071           0 :   args->repair.slot         = slot;
    1072           0 :   args->repair.sort_by_slot = sort_by_slot;
    1073           0 :   args->repair.end_slot     = end_slot;
    1074           0 : }
    1075             : 
    1076             : static void
    1077             : repair_cmd_fn( args_t *   args,
    1078           0 :                config_t * config ) {
    1079             : 
    1080           0 :   if( args->repair.help ) {
    1081           0 :     repair_cmd_help( args->repair.pos_arg );
    1082           0 :     return;
    1083           0 :   }
    1084             : 
    1085           0 :   if     ( !strcmp( args->repair.pos_arg, "catchup"   ) ) repair_cmd_fn_catchup  ( args, config );
    1086           0 :   else if( !strcmp( args->repair.pos_arg, "forest"    ) ) repair_cmd_fn_forest   ( args, config );
    1087           0 :   else if( !strcmp( args->repair.pos_arg, "inflight"  ) ) repair_cmd_fn_inflight ( args, config );
    1088           0 :   else if( !strcmp( args->repair.pos_arg, "requests"  ) ) repair_cmd_fn_requests ( args, config );
    1089           0 :   else if( !strcmp( args->repair.pos_arg, "waterfall" ) ) repair_cmd_fn_waterfall( args, config );
    1090           0 :   else if( !strcmp( args->repair.pos_arg, "peers"     ) ) repair_cmd_fn_peers    ( args, config );
    1091           0 :   else                                                    repair_cmd_help( NULL );
    1092           0 : }
    1093             : 
    1094             : action_t fd_action_repair = {
    1095             :   .name = "repair",
    1096             :   .args = repair_cmd_args,
    1097             :   .fn   = repair_cmd_fn,
    1098             :   .perm = dev_cmd_perm,
    1099             : };

Generated by: LCOV version 1.14