LCOV - code coverage report
Current view: top level - app/shared/commands/monitor - monitor.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 434 0.0 %
Date: 2025-12-07 04:58:33 Functions: 0 12 0.0 %

          Line data    Source code
       1             : #include "../../../../util/fd_util.h"
       2             : /* TODO: Layering violation */
       3             : #include "../../../shared_dev/commands/bench/bench.h"
       4             : 
       5             : #include "../../fd_config.h"
       6             : #include "../../../platform/fd_cap_chk.h"
       7             : #include "../../../../disco/topo/fd_topo.h"
       8             : #include "../../../../disco/metrics/fd_metrics.h"
       9             : 
      10             : #include "helper.h"
      11             : 
      12             : #include <unistd.h>
      13             : #include <errno.h>
      14             : #include <stdio.h>
      15             : #include <stdlib.h>
      16             : #include <signal.h>
      17             : #include <sys/syscall.h>
      18             : #include <sys/resource.h>
      19             : #include <linux/capability.h>
      20             : #include <sys/ioctl.h>
      21             : #include <termios.h>
      22             : #include "generated/monitor_seccomp.h"
      23             : 
      24             : void
      25             : monitor_cmd_args( int *    pargc,
      26             :                   char *** pargv,
      27           0 :                   args_t * args ) {
      28           0 :   args->monitor.drain_output_fd = -1; /* only accessible to development commands, not the command line */
      29           0 :   args->monitor.dt_min          = fd_env_strip_cmdline_long( pargc, pargv, "--dt-min",   NULL,    6666667.          );
      30           0 :   args->monitor.dt_max          = fd_env_strip_cmdline_long( pargc, pargv, "--dt-max",   NULL,  133333333.          );
      31           0 :   args->monitor.duration        = fd_env_strip_cmdline_long( pargc, pargv, "--duration", NULL,          0.          );
      32           0 :   args->monitor.seed            = fd_env_strip_cmdline_uint( pargc, pargv, "--seed",     NULL, (uint)fd_tickcount() );
      33           0 :   args->monitor.ns_per_tic      = 1./fd_tempo_tick_per_ns( NULL ); /* calibrate during init */
      34             : 
      35           0 :   args->monitor.with_bench     = fd_env_strip_cmdline_contains( pargc, pargv, "--bench" );
      36           0 :   args->monitor.with_sankey    = fd_env_strip_cmdline_contains( pargc, pargv, "--sankey" );
      37             : 
      38           0 :   if( FD_UNLIKELY( args->monitor.dt_min<0L                   ) ) FD_LOG_ERR(( "--dt-min should be positive"          ));
      39           0 :   if( FD_UNLIKELY( args->monitor.dt_max<args->monitor.dt_min ) ) FD_LOG_ERR(( "--dt-max should be at least --dt-min" ));
      40           0 :   if( FD_UNLIKELY( args->monitor.duration<0L                 ) ) FD_LOG_ERR(( "--duration should be non-negative"    ));
      41           0 : }
      42             : 
      43             : void
      44             : monitor_cmd_perm( args_t *         args FD_PARAM_UNUSED,
      45             :                   fd_cap_chk_t *   chk,
      46           0 :                   config_t const * config ) {
      47           0 :   ulong mlock_limit = fd_topo_mlock( &config->topo );
      48             : 
      49           0 :   fd_cap_chk_raise_rlimit( chk, "monitor", RLIMIT_MEMLOCK, mlock_limit, "call `rlimit(2)` to increase `RLIMIT_MEMLOCK` so all memory can be locked with `mlock(2)`" );
      50             : 
      51           0 :   if( fd_sandbox_requires_cap_sys_admin( config->uid, config->gid ) )
      52           0 :     fd_cap_chk_cap( chk, "monitor", CAP_SYS_ADMIN,               "call `unshare(2)` with `CLONE_NEWUSER` to sandbox the process in a user namespace" );
      53           0 :   if( FD_LIKELY( getuid() != config->uid ) )
      54           0 :     fd_cap_chk_cap( chk, "monitor", CAP_SETUID,                  "call `setresuid(2)` to switch uid to the sanbox user" );
      55           0 :   if( FD_LIKELY( getgid() != config->gid ) )
      56           0 :     fd_cap_chk_cap( chk, "monitor", CAP_SETGID,                  "call `setresgid(2)` to switch gid to the sandbox user" );
      57           0 : }
      58             : 
      59             : typedef struct {
      60             :   ulong pid;
      61             :   ulong heartbeat;
      62             :   ulong status;
      63             : 
      64             :   ulong in_backp;
      65             :   ulong backp_cnt;
      66             : 
      67             :   ulong nvcsw;
      68             :   ulong nivcsw;
      69             : 
      70             :   ulong regime_ticks[9];
      71             : } tile_snap_t;
      72             : 
      73             : typedef struct {
      74             :   ulong mcache_seq;
      75             : 
      76             :   ulong fseq_seq;
      77             : 
      78             :   ulong fseq_diag_tot_cnt;
      79             :   ulong fseq_diag_tot_sz;
      80             :   ulong fseq_diag_filt_cnt;
      81             :   ulong fseq_diag_filt_sz;
      82             :   ulong fseq_diag_ovrnp_cnt;
      83             :   ulong fseq_diag_ovrnr_cnt;
      84             :   ulong fseq_diag_slow_cnt;
      85             : } link_snap_t;
      86             : 
      87             : static ulong
      88           0 : tile_total_ticks( tile_snap_t * snap ) {
      89           0 :   ulong total = 0UL;
      90           0 :   for( ulong i=0UL; i<9UL; i++ ) total += snap->regime_ticks[ i ];
      91           0 :   return total;
      92           0 : }
      93             : 
      94             : static void
      95             : tile_snap( tile_snap_t *     snap_cur, /* Snapshot for each tile, indexed [0,tile_cnt) */
      96           0 :            fd_topo_t const * topo ) {
      97           0 :   for( ulong tile_idx=0UL; tile_idx<topo->tile_cnt; tile_idx++ ) {
      98           0 :     tile_snap_t * snap = &snap_cur[ tile_idx ];
      99             : 
     100           0 :     fd_topo_tile_t const * tile = &topo->tiles[ tile_idx ];
     101           0 :     snap->heartbeat = fd_metrics_tile( tile->metrics )[ FD_METRICS_GAUGE_TILE_HEARTBEAT_OFF ];
     102           0 :     snap->status    = fd_metrics_tile( tile->metrics )[ FD_METRICS_GAUGE_TILE_STATUS_OFF    ];
     103             : 
     104           0 :     fd_metrics_register( tile->metrics );
     105             : 
     106           0 :     FD_COMPILER_MFENCE();
     107           0 :     snap->pid       = FD_MGAUGE_GET( TILE, PID );
     108           0 :     snap->nvcsw     = FD_MCNT_GET( TILE, CONTEXT_SWITCH_VOLUNTARY_COUNT );
     109           0 :     snap->nivcsw    = FD_MCNT_GET( TILE, CONTEXT_SWITCH_INVOLUNTARY_COUNT );
     110           0 :     snap->in_backp  = FD_MGAUGE_GET( TILE, IN_BACKPRESSURE );
     111           0 :     snap->backp_cnt = FD_MCNT_GET( TILE, BACKPRESSURE_COUNT );
     112           0 :     for( ulong i=0UL; i<9UL; i++ ) {
     113           0 :       snap->regime_ticks[ i ] = fd_metrics_tl[ MIDX(COUNTER, TILE, REGIME_DURATION_NANOS)+i ];
     114           0 :     }
     115           0 :     FD_COMPILER_MFENCE();
     116           0 :   }
     117           0 : }
     118             : 
     119             : static ulong
     120             : find_producer_out_idx( fd_topo_t const *      topo,
     121             :                        fd_topo_tile_t const * producer,
     122             :                        fd_topo_tile_t const * consumer,
     123           0 :                        ulong                  consumer_in_idx ) {
     124             :   /* This finds all reliable consumers of the producers primary output,
     125             :      and then returns the position of the consumer (specified by tile
     126             :      and index of the in of that tile) in that list. The list ordering
     127             :      is not important, except that it matches the ordering of fseqs
     128             :      provided to fd_stem, so that metrics written for each link index
     129             :      are retrieved at the same index here.
     130             : 
     131             :      This is why we only count reliable links, because fd_stem only
     132             :      looks at and writes producer side diagnostics (is the link slow)
     133             :      for reliable links. */
     134             : 
     135           0 :   ulong reliable_cons_cnt = 0UL;
     136           0 :   for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
     137           0 :     fd_topo_tile_t const * consumer_tile = &topo->tiles[ i ];
     138           0 :     for( ulong j=0UL; j<consumer_tile->in_cnt; j++ ) {
     139           0 :       for( ulong k=0UL; k<producer->out_cnt; k++ ) {
     140           0 :         if( FD_UNLIKELY( consumer_tile->in_link_id[ j ]==producer->out_link_id[ k ] && consumer_tile->in_link_reliable[ j ] ) ) {
     141           0 :           if( FD_UNLIKELY( consumer==consumer_tile && consumer_in_idx==j ) ) return reliable_cons_cnt;
     142           0 :           reliable_cons_cnt++;
     143           0 :         }
     144           0 :       }
     145           0 :     }
     146           0 :   }
     147             : 
     148           0 :   return ULONG_MAX;
     149           0 : }
     150             : 
     151             : static void
     152             : link_snap( link_snap_t *     snap_cur,
     153           0 :            fd_topo_t const * topo ) {
     154           0 :   ulong link_idx = 0UL;
     155           0 :   for( ulong tile_idx=0UL; tile_idx<topo->tile_cnt; tile_idx++ ) {
     156           0 :     for( ulong in_idx=0UL; in_idx<topo->tiles[ tile_idx ].in_cnt; in_idx++ ) {
     157           0 :       link_snap_t * snap = &snap_cur[ link_idx ];
     158           0 :       fd_frag_meta_t const * mcache = topo->links[ topo->tiles[ tile_idx ].in_link_id[ in_idx  ] ].mcache;
     159           0 :       ulong const * seq = (ulong const *)fd_mcache_seq_laddr_const( mcache );
     160           0 :       snap->mcache_seq = fd_mcache_seq_query( seq );
     161             : 
     162           0 :       ulong const * fseq = topo->tiles[ tile_idx ].in_link_fseq[ in_idx ];
     163           0 :       snap->fseq_seq = fd_fseq_query( fseq );
     164             : 
     165           0 :       ulong const * in_metrics = NULL;
     166           0 :       if( FD_LIKELY( topo->tiles[ tile_idx ].in_link_poll[ in_idx ] ) ) {
     167           0 :         in_metrics = (ulong const *)fd_metrics_link_in( topo->tiles[ tile_idx ].metrics, in_idx );
     168           0 :       }
     169             : 
     170           0 :       fd_topo_link_t const * link = &topo->links[ topo->tiles[ tile_idx ].in_link_id[ in_idx ] ];
     171           0 :       ulong producer_id = fd_topo_find_link_producer( topo, link );
     172           0 :       FD_TEST( producer_id!=ULONG_MAX );
     173           0 :       volatile ulong const * out_metrics = NULL;
     174           0 :       if( FD_LIKELY( topo->tiles[ tile_idx ].in_link_reliable[ in_idx ] ) ) {
     175           0 :         fd_topo_tile_t const * producer = &topo->tiles[ producer_id ];
     176           0 :         ulong cons_idx = find_producer_out_idx( topo, producer, &topo->tiles[ tile_idx ], in_idx );
     177             : 
     178           0 :         out_metrics = fd_metrics_link_out( producer->metrics, cons_idx );
     179           0 :       }
     180           0 :       FD_COMPILER_MFENCE();
     181           0 :       if( FD_LIKELY( in_metrics ) ) {
     182           0 :         snap->fseq_diag_tot_cnt   = in_metrics[ FD_METRICS_COUNTER_LINK_CONSUMED_COUNT_OFF ];
     183           0 :         snap->fseq_diag_tot_sz    = in_metrics[ FD_METRICS_COUNTER_LINK_CONSUMED_SIZE_BYTES_OFF ];
     184           0 :         snap->fseq_diag_filt_cnt  = in_metrics[ FD_METRICS_COUNTER_LINK_FILTERED_COUNT_OFF ];
     185           0 :         snap->fseq_diag_filt_sz   = in_metrics[ FD_METRICS_COUNTER_LINK_FILTERED_SIZE_BYTES_OFF ];
     186           0 :         snap->fseq_diag_ovrnp_cnt = in_metrics[ FD_METRICS_COUNTER_LINK_OVERRUN_POLLING_COUNT_OFF ];
     187           0 :         snap->fseq_diag_ovrnr_cnt = in_metrics[ FD_METRICS_COUNTER_LINK_OVERRUN_READING_COUNT_OFF ];
     188           0 :       } else {
     189           0 :         snap->fseq_diag_tot_cnt   = 0UL;
     190           0 :         snap->fseq_diag_tot_sz    = 0UL;
     191           0 :         snap->fseq_diag_filt_cnt  = 0UL;
     192           0 :         snap->fseq_diag_filt_sz   = 0UL;
     193           0 :         snap->fseq_diag_ovrnp_cnt = 0UL;
     194           0 :         snap->fseq_diag_ovrnr_cnt = 0UL;
     195           0 :       }
     196             : 
     197           0 :       if( FD_LIKELY( out_metrics ) )
     198           0 :         snap->fseq_diag_slow_cnt  = out_metrics[ FD_METRICS_COUNTER_LINK_SLOW_COUNT_OFF ];
     199           0 :       else
     200           0 :         snap->fseq_diag_slow_cnt  = 0UL;
     201           0 :       FD_COMPILER_MFENCE();
     202           0 :       snap->fseq_diag_tot_cnt += snap->fseq_diag_filt_cnt;
     203           0 :       snap->fseq_diag_tot_sz  += snap->fseq_diag_filt_sz;
     204           0 :       link_idx++;
     205           0 :     }
     206           0 :   }
     207           0 : }
     208             : 
     209             : /**********************************************************************/
     210             : 
     211           0 : static void write_stdout( char * buf, ulong buf_sz ) {
     212           0 :   ulong written = 0;
     213           0 :   ulong total = buf_sz;
     214           0 :   while( written < total ) {
     215           0 :     long n = write( STDOUT_FILENO, buf + written, total - written );
     216           0 :     if( FD_UNLIKELY( n < 0 ) ) {
     217           0 :       if( errno == EINTR ) continue;
     218           0 :       FD_LOG_ERR(( "error writing to stdout (%i-%s)", errno, fd_io_strerror( errno ) ));
     219           0 :     }
     220           0 :     written += (ulong)n;
     221           0 :   }
     222           0 : }
     223             : 
     224             : static int stop1 = 0;
     225             : 
     226           0 : #define FD_MONITOR_TEXT_BUF_SZ 131072
     227             : static char buffer[ FD_MONITOR_TEXT_BUF_SZ ];
     228             : static char buffer2[ FD_MONITOR_TEXT_BUF_SZ ];
     229             : 
     230             : static void
     231             : drain_to_buffer( char ** buf,
     232             :                  ulong * buf_sz,
     233           0 :                  int fd ) {
     234           0 :   while(1) {
     235           0 :     long nread = read( fd, buffer2, *buf_sz );
     236           0 :     if( FD_LIKELY( nread == -1 && errno == EAGAIN ) ) break; /* no data available */
     237           0 :     else if( FD_UNLIKELY( nread == -1 ) ) FD_LOG_ERR(( "read() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     238             : 
     239           0 :     char * ptr = buffer2;
     240           0 :     char * next;
     241           0 :     while(( next = memchr( ptr, '\n', (ulong)nread - (ulong)(ptr - buffer2) ))) {
     242           0 :       ulong len = (ulong)(next - ptr);
     243           0 :       if( FD_UNLIKELY( *buf_sz < len ) ) {
     244           0 :         write_stdout( buffer, FD_MONITOR_TEXT_BUF_SZ - *buf_sz );
     245           0 :         *buf = buffer;
     246           0 :         *buf_sz = FD_MONITOR_TEXT_BUF_SZ;
     247           0 :       }
     248           0 :       fd_memcpy( *buf, ptr, len );
     249           0 :       *buf += len;
     250           0 :       *buf_sz -= len;
     251             : 
     252           0 :       if( FD_UNLIKELY( *buf_sz < sizeof(TEXT_NEWLINE)-1 ) ) {
     253           0 :         write_stdout( buffer, FD_MONITOR_TEXT_BUF_SZ - *buf_sz );
     254           0 :         *buf = buffer;
     255           0 :         *buf_sz = FD_MONITOR_TEXT_BUF_SZ;
     256           0 :       }
     257           0 :       fd_memcpy( *buf, TEXT_NEWLINE, sizeof(TEXT_NEWLINE)-1 );
     258           0 :       *buf += sizeof(TEXT_NEWLINE)-1;
     259           0 :       *buf_sz -= sizeof(TEXT_NEWLINE)-1;
     260             : 
     261           0 :       ptr = next + 1;
     262           0 :     }
     263           0 :   }
     264           0 : }
     265             : 
     266             : static struct termios termios_backup;
     267             : 
     268             : static void
     269           0 : restore_terminal( void ) {
     270           0 :   (void)ioctl( STDIN_FILENO, TCSETS, &termios_backup );
     271           0 : }
     272             : 
     273             : static void
     274             : run_monitor( config_t const * config,
     275             :              int              drain_output_fd,
     276             :              int              with_sankey,
     277             :              long             dt_min,
     278             :              long             dt_max,
     279             :              long             duration,
     280             :              uint             seed,
     281           0 :              double           ns_per_tic ) {
     282           0 :   fd_topo_t const * topo = &config->topo;
     283             : 
     284             :   /* Setup local objects used by this app */
     285           0 :   fd_rng_t _rng[1];
     286           0 :   fd_rng_t * rng = fd_rng_join( fd_rng_new( _rng, seed, 0UL ) );
     287             : 
     288           0 :   tile_snap_t * tile_snap_prv = (tile_snap_t *)fd_alloca( alignof(tile_snap_t), sizeof(tile_snap_t)*2UL*topo->tile_cnt );
     289           0 :   if( FD_UNLIKELY( !tile_snap_prv ) ) FD_LOG_ERR(( "fd_alloca failed" )); /* Paranoia */
     290           0 :   tile_snap_t * tile_snap_cur = tile_snap_prv + topo->tile_cnt;
     291             : 
     292           0 :   ulong link_cnt = 0UL;
     293           0 :   for( ulong tile_idx=0UL; tile_idx<topo->tile_cnt; tile_idx++ ) link_cnt += topo->tiles[ tile_idx ].in_cnt;
     294           0 :   link_snap_t * link_snap_prv = (link_snap_t *)fd_alloca( alignof(link_snap_t), sizeof(link_snap_t)*2UL*link_cnt );
     295           0 :   if( FD_UNLIKELY( !link_snap_prv ) ) FD_LOG_ERR(( "fd_alloca failed" )); /* Paranoia */
     296           0 :   link_snap_t * link_snap_cur = link_snap_prv + link_cnt;
     297             : 
     298             :   /* Get the initial reference diagnostic snapshot */
     299           0 :   tile_snap( tile_snap_prv, topo );
     300           0 :   link_snap( link_snap_prv, topo );
     301           0 :   long then; long tic; fd_tempo_observe_pair( &then, &tic );
     302             : 
     303             :   /* Monitor for duration ns.  Note that for duration==0, this
     304             :      will still do exactly one pretty print. */
     305           0 :   FD_LOG_NOTICE(( "monitoring --dt-min %li ns, --dt-max %li ns, --duration %li ns, --seed %u", dt_min, dt_max, duration, seed ));
     306             : 
     307           0 :   long stop = then + duration;
     308           0 :   if( duration == 0 ) stop = LONG_MAX;
     309             : 
     310           0 : #define PRINT( ... ) do {                                                       \
     311           0 :     int n = snprintf( buf, buf_sz, __VA_ARGS__ );                               \
     312           0 :     if( FD_UNLIKELY( n<0 ) ) FD_LOG_ERR(( "snprintf failed" ));                 \
     313           0 :     if( FD_UNLIKELY( (ulong)n>=buf_sz ) ) FD_LOG_ERR(( "snprintf truncated" )); \
     314           0 :     buf += n; buf_sz -= (ulong)n;                                               \
     315           0 :   } while(0)
     316           0 :   int monitor_pane = 0;
     317             : 
     318             :   /* Restore original terminal attributes at exit */
     319           0 :   atexit( restore_terminal );
     320           0 :   if( FD_UNLIKELY( ioctl( STDIN_FILENO, TCGETS, &termios_backup ) ) ) {
     321           0 :     FD_LOG_ERR(( "ioctl(STDIN_FILENO) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     322           0 :   }
     323             : 
     324             :   /* Disable character echo and line buffering */
     325           0 :   struct termios term = termios_backup;
     326           0 :   term.c_lflag &= (tcflag_t)~(ICANON | ECHO);
     327           0 :   if( FD_UNLIKELY( ioctl( STDIN_FILENO, TCSETS, &term ) ) ) {
     328           0 :     FD_LOG_WARNING(( "ioctl(STDIN_FILENO) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     329           0 :   }
     330             : 
     331           0 :   for(;;) {
     332             :     /* Wait a somewhat randomized amount and then make a diagnostic
     333             :        snapshot */
     334           0 :     fd_log_wait_until( then + dt_min + (long)fd_rng_ulong_roll( rng, 1UL+(ulong)(dt_max-dt_min) ) );
     335             : 
     336           0 :     tile_snap( tile_snap_cur, topo );
     337           0 :     link_snap( link_snap_cur, topo );
     338           0 :     long now; long toc; fd_tempo_observe_pair( &now, &toc );
     339             : 
     340             :     /* Pretty print a comparison between this diagnostic snapshot and
     341             :        the previous one. */
     342             : 
     343           0 :     char * buf = buffer;
     344           0 :     ulong buf_sz = FD_MONITOR_TEXT_BUF_SZ;
     345             : 
     346           0 :     PRINT( "\033[2J\033[H" );
     347             : 
     348             :     /* drain any firedancer log messages into the terminal */
     349           0 :     if( FD_UNLIKELY( drain_output_fd >= 0 ) ) drain_to_buffer( &buf, &buf_sz, drain_output_fd );
     350           0 :     if( FD_UNLIKELY( buf_sz < FD_MONITOR_TEXT_BUF_SZ / 2 ) ) {
     351             :       /* make sure there's enough space to print the whole monitor in one go */
     352           0 :       write_stdout( buffer, FD_MONITOR_TEXT_BUF_SZ - buf_sz );
     353           0 :       buf = buffer;
     354           0 :       buf_sz = FD_MONITOR_TEXT_BUF_SZ;
     355           0 :     }
     356             : 
     357           0 :     if( FD_UNLIKELY( drain_output_fd >= 0 ) ) PRINT( TEXT_NEWLINE );
     358           0 :     int c = fd_getchar();
     359           0 :     if( FD_UNLIKELY( c=='\t'   ) ) monitor_pane = !monitor_pane;
     360           0 :     if( FD_UNLIKELY( c=='\x04' ) ) break; /* Ctrl-D */
     361             : 
     362           0 :     long dt = now-then;
     363             : 
     364           0 :     char now_cstr[ FD_LOG_WALLCLOCK_CSTR_BUF_SZ ];
     365           0 :     if( !monitor_pane ) {
     366           0 :       PRINT( "snapshot for %s | Use TAB to switch panes" TEXT_NEWLINE, fd_log_wallclock_cstr( now, now_cstr ) );
     367           0 :       PRINT( "    tile |     pid |      stale | heart | nivcsw              | nvcsw               | in backp |           backp cnt |  %% hkeep |  %% wait  |  %% backp | %% finish" TEXT_NEWLINE );
     368           0 :       PRINT( "---------+---------+------------+-------+---------------------+---------------------+----------+---------------------+----------+----------+----------+----------" TEXT_NEWLINE );
     369           0 :       for( ulong tile_idx=0UL; tile_idx<topo->tile_cnt; tile_idx++ ) {
     370           0 :         tile_snap_t * prv = &tile_snap_prv[ tile_idx ];
     371           0 :         tile_snap_t * cur = &tile_snap_cur[ tile_idx ];
     372           0 :         if( cur->status==2UL ) continue; /* stopped tile */
     373           0 :         PRINT( " %7s", topo->tiles[ tile_idx ].name );
     374           0 :         PRINT( " | %7lu", cur->pid );
     375           0 :         PRINT( " | " ); printf_stale   ( &buf, &buf_sz, (long)(0.5+ns_per_tic*(double)(toc - (long)cur->heartbeat)), 1e8 /* 100 millis */ );
     376           0 :         PRINT( " | " ); printf_heart   ( &buf, &buf_sz, (long)cur->heartbeat, (long)prv->heartbeat  );
     377           0 :         PRINT( " | " ); printf_err_cnt ( &buf, &buf_sz, cur->nivcsw,          prv->nivcsw );
     378           0 :         PRINT( " | " ); printf_err_cnt ( &buf, &buf_sz, cur->nvcsw,           prv->nvcsw  );
     379           0 :         PRINT( " | " ); printf_err_bool( &buf, &buf_sz, cur->in_backp,        prv->in_backp   );
     380           0 :         PRINT( " | " ); printf_err_cnt ( &buf, &buf_sz, cur->backp_cnt,       prv->backp_cnt  );
     381             : 
     382           0 :         ulong cur_hkeep_ticks      = cur->regime_ticks[0]+cur->regime_ticks[1]+cur->regime_ticks[2];
     383           0 :         ulong prv_hkeep_ticks      = prv->regime_ticks[0]+prv->regime_ticks[1]+prv->regime_ticks[2];
     384             : 
     385           0 :         ulong cur_wait_ticks       = cur->regime_ticks[3]+cur->regime_ticks[6];
     386           0 :         ulong prv_wait_ticks       = prv->regime_ticks[3]+prv->regime_ticks[6];
     387             : 
     388           0 :         ulong cur_backp_ticks      = cur->regime_ticks[5];
     389           0 :         ulong prv_backp_ticks      = prv->regime_ticks[5];
     390             : 
     391           0 :         ulong cur_processing_ticks = cur->regime_ticks[4]+cur->regime_ticks[7];
     392           0 :         ulong prv_processing_ticks = prv->regime_ticks[4]+prv->regime_ticks[7];
     393             : 
     394           0 :         PRINT( " | " ); printf_pct( &buf, &buf_sz, cur_hkeep_ticks,      prv_hkeep_ticks,      0., tile_total_ticks( cur ), tile_total_ticks( prv ), DBL_MIN );
     395           0 :         PRINT( " | " ); printf_pct( &buf, &buf_sz, cur_wait_ticks,       prv_wait_ticks,       0., tile_total_ticks( cur ), tile_total_ticks( prv ), DBL_MIN );
     396           0 :         PRINT( " | " ); printf_pct( &buf, &buf_sz, cur_backp_ticks,      prv_backp_ticks,      0., tile_total_ticks( cur ), tile_total_ticks( prv ), DBL_MIN );
     397           0 :         PRINT( " | " ); printf_pct( &buf, &buf_sz, cur_processing_ticks, prv_processing_ticks, 0., tile_total_ticks( cur ), tile_total_ticks( prv ), DBL_MIN );
     398           0 :         PRINT( TEXT_NEWLINE );
     399           0 :       }
     400           0 :     } else {
     401           0 :       PRINT( "             link |  tot TPS |  tot bps | uniq TPS | uniq bps |   ha tr%% | uniq bw%% | filt tr%% | filt bw%% |           ovrnp cnt |           ovrnr cnt |            slow cnt |             tx seq" TEXT_NEWLINE );
     402           0 :       PRINT( "------------------+----------+----------+----------+----------+----------+----------+----------+----------+---------------------+---------------------+---------------------+-------------------" TEXT_NEWLINE );
     403             : 
     404           0 :       ulong link_idx = 0UL;
     405           0 :       for( ulong tile_idx=0UL; tile_idx<topo->tile_cnt; tile_idx++ ) {
     406           0 :         for( ulong in_idx=0UL; in_idx<topo->tiles[ tile_idx ].in_cnt; in_idx++ ) {
     407           0 :           link_snap_t * prv = &link_snap_prv[ link_idx ];
     408           0 :           link_snap_t * cur = &link_snap_cur[ link_idx ];
     409             : 
     410           0 :           fd_topo_link_t link = topo->links[ topo->tiles[ tile_idx ].in_link_id[ in_idx ] ];
     411           0 :           ulong producer_tile_id = fd_topo_find_link_producer( topo, &link );
     412           0 :           FD_TEST( producer_tile_id != ULONG_MAX );
     413           0 :           char const * producer = topo->tiles[ producer_tile_id ].name;
     414           0 :           PRINT( " %7s->%-7s", producer, topo->tiles[ tile_idx ].name );
     415           0 :           ulong cur_raw_cnt = /* cur->cnc_diag_ha_filt_cnt + */ cur->fseq_diag_tot_cnt;
     416           0 :           ulong cur_raw_sz  = /* cur->cnc_diag_ha_filt_sz  + */ cur->fseq_diag_tot_sz;
     417           0 :           ulong prv_raw_cnt = /* prv->cnc_diag_ha_filt_cnt + */ prv->fseq_diag_tot_cnt;
     418           0 :           ulong prv_raw_sz  = /* prv->cnc_diag_ha_filt_sz  + */ prv->fseq_diag_tot_sz;
     419             : 
     420           0 :           PRINT( " | " ); printf_rate( &buf, &buf_sz, 1e9, 0., cur_raw_cnt,             prv_raw_cnt,             dt );
     421           0 :           PRINT( " | " ); printf_rate( &buf, &buf_sz, 8e9, 0., cur_raw_sz,              prv_raw_sz,              dt ); /* Assumes sz incl framing */
     422           0 :           PRINT( " | " ); printf_rate( &buf, &buf_sz, 1e9, 0., cur->fseq_diag_tot_cnt,  prv->fseq_diag_tot_cnt,  dt );
     423           0 :           PRINT( " | " ); printf_rate( &buf, &buf_sz, 8e9, 0., cur->fseq_diag_tot_sz,   prv->fseq_diag_tot_sz,   dt ); /* Assumes sz incl framing */
     424             : 
     425           0 :           PRINT( " | " ); printf_pct ( &buf, &buf_sz, cur->fseq_diag_tot_cnt,  prv->fseq_diag_tot_cnt, 0.,
     426           0 :                                       cur_raw_cnt,             prv_raw_cnt,            DBL_MIN );
     427           0 :           PRINT( " | " ); printf_pct ( &buf, &buf_sz, cur->fseq_diag_tot_sz,   prv->fseq_diag_tot_sz,  0.,
     428           0 :                                       cur_raw_sz,              prv_raw_sz,             DBL_MIN ); /* Assumes sz incl framing */
     429           0 :           PRINT( " | " ); printf_pct ( &buf, &buf_sz, cur->fseq_diag_filt_cnt, prv->fseq_diag_filt_cnt, 0.,
     430           0 :                                       cur->fseq_diag_tot_cnt,  prv->fseq_diag_tot_cnt,  DBL_MIN );
     431           0 :           PRINT( " | " ); printf_pct ( &buf, &buf_sz, cur->fseq_diag_filt_sz,  prv->fseq_diag_filt_sz, 0.,
     432           0 :                                       cur->fseq_diag_tot_sz,   prv->fseq_diag_tot_sz,  DBL_MIN ); /* Assumes sz incl framing */
     433             : 
     434           0 :           PRINT( " | " ); printf_err_cnt( &buf, &buf_sz, cur->fseq_diag_ovrnp_cnt, prv->fseq_diag_ovrnp_cnt );
     435           0 :           PRINT( " | " ); printf_err_cnt( &buf, &buf_sz, cur->fseq_diag_ovrnr_cnt, prv->fseq_diag_ovrnr_cnt );
     436           0 :           PRINT( " | " ); printf_err_cnt( &buf, &buf_sz, cur->fseq_diag_slow_cnt,  prv->fseq_diag_slow_cnt  );
     437           0 :           PRINT( " | " ); printf_seq(     &buf, &buf_sz, cur->mcache_seq,          prv->mcache_seq  );
     438           0 :           PRINT( TEXT_NEWLINE );
     439           0 :           link_idx++;
     440           0 :         }
     441           0 :       }
     442           0 :     }
     443           0 :     if( FD_UNLIKELY( with_sankey ) ) {
     444             :       /* We only need to count from one of the benchs, since they both receive
     445             :          all of the transactions. */
     446           0 :       fd_topo_tile_t const * benchs = &topo->tiles[ fd_topo_find_tile( topo, "benchs", 0UL ) ];
     447           0 :       ulong fseq_sum = 0UL;
     448           0 :       for( ulong i=0UL; i<benchs->in_cnt; i++ ) {
     449           0 :         ulong const * fseq = benchs->in_link_fseq[ i ];
     450           0 :         fseq_sum += fd_fseq_query( fseq );
     451           0 :       }
     452             : 
     453           0 :       ulong net_tile_idx = fd_topo_find_tile( topo, "net", 0UL );
     454           0 :       if( FD_UNLIKELY( net_tile_idx==ULONG_MAX ) ) FD_LOG_ERR(( "net tile not found" ));
     455             : 
     456           0 :       fd_topo_tile_t const * net = &topo->tiles[ net_tile_idx ];
     457           0 :       ulong net_sent = fd_mcache_seq_query( fd_mcache_seq_laddr( topo->links[ net->out_link_id[ 0 ] ].mcache ) );
     458           0 :       net_sent      += fd_mcache_seq_query( fd_mcache_seq_laddr( topo->links[ net->out_link_id[ 1 ] ].mcache ) );
     459           0 :       net_sent = fseq_sum;
     460             : 
     461           0 :       ulong verify_failed  = 0UL;
     462           0 :       ulong verify_sent    = 0UL;
     463           0 :       ulong verify_overrun = 0UL;
     464           0 :       for( ulong i=0UL; i<config->layout.verify_tile_count; i++ ) {
     465           0 :         fd_topo_tile_t const * verify = &topo->tiles[ fd_topo_find_tile( topo, "verify", i ) ];
     466           0 :         verify_overrun += fd_metrics_link_in( verify->metrics, 0UL )[ FD_METRICS_COUNTER_LINK_OVERRUN_POLLING_FRAG_COUNT_OFF ] / config->layout.verify_tile_count;
     467           0 :         verify_failed += fd_metrics_link_in( verify->metrics, 0UL )[ FD_METRICS_COUNTER_LINK_FILTERED_COUNT_OFF ];
     468           0 :         verify_sent += fd_mcache_seq_query( fd_mcache_seq_laddr( topo->links[ verify->out_link_id[ 0 ] ].mcache ) );
     469           0 :       }
     470             : 
     471           0 :       fd_topo_tile_t const * dedup = &topo->tiles[ fd_topo_find_tile( topo, "dedup", 0UL ) ];
     472           0 :       ulong dedup_failed = 0UL;
     473           0 :       for( ulong i=0UL; i<config->layout.verify_tile_count; i++) {
     474           0 :         dedup_failed += fd_metrics_link_in( dedup->metrics, i )[ FD_METRICS_COUNTER_LINK_FILTERED_COUNT_OFF ];
     475           0 :       }
     476           0 :       ulong dedup_sent = fd_mcache_seq_query( fd_mcache_seq_laddr( topo->links[ dedup->out_link_id[ 0 ] ].mcache ) );
     477             : 
     478           0 :       fd_topo_tile_t const * pack = &topo->tiles[ fd_topo_find_tile( topo, "pack", 0UL ) ];
     479           0 :       volatile ulong * pack_metrics = fd_metrics_tile( pack->metrics );
     480           0 :       ulong pack_invalid = pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_WRITE_SYSVAR_OFF ] +
     481           0 :                            pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_ESTIMATION_FAIL_OFF ] +
     482           0 :                            pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_TOO_LARGE_OFF ] +
     483           0 :                            pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_EXPIRED_OFF ] +
     484           0 :                            pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_ADDR_LUT_OFF ] +
     485           0 :                            pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_UNAFFORDABLE_OFF ] +
     486           0 :                            pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_DUPLICATE_OFF ] +
     487           0 :                            pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_PRIORITY_OFF ] +
     488           0 :                            pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_NONVOTE_REPLACE_OFF ] +
     489           0 :                            pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_INSERTED_VOTE_REPLACE_OFF ];
     490           0 :       ulong pack_overrun = pack_metrics[ FD_METRICS_COUNTER_PACK_TRANSACTION_DROPPED_FROM_EXTRA_OFF ];
     491           0 :       ulong pack_sent = pack_metrics[ FD_METRICS_HISTOGRAM_PACK_TOTAL_TRANSACTIONS_PER_MICROBLOCK_COUNT_OFF + FD_HISTF_BUCKET_CNT ];
     492             : 
     493           0 :       static ulong last_fseq_sum;
     494           0 :       static ulong last_net_sent;
     495           0 :       static ulong last_verify_overrun;
     496           0 :       static ulong last_verify_failed;
     497           0 :       static ulong last_verify_sent;
     498           0 :       static ulong last_dedup_failed;
     499           0 :       static ulong last_dedup_sent;
     500           0 :       static ulong last_pack_overrun;
     501           0 :       static ulong last_pack_invalid;
     502           0 :       static ulong last_pack_sent;
     503             : 
     504           0 :       PRINT( "TXNS SENT:      %-10lu" TEXT_NEWLINE, fseq_sum );
     505           0 :       PRINT( "NET TXNS SENT:  %-10lu %-5.2lf%%  %-5.2lf%%" TEXT_NEWLINE, net_sent,       100.0 * (double)net_sent/(double)fseq_sum,        100.0 * (double)(net_sent - last_net_sent)/(double)(fseq_sum - last_fseq_sum)               );
     506           0 :       PRINT( "VERIFY OVERRUN: %-10lu %-5.2lf%%  %-5.2lf%%" TEXT_NEWLINE, verify_overrun, 100.0 * (double)verify_overrun/(double)net_sent,  100.0 * (double)(verify_overrun - last_verify_overrun)/(double)(net_sent - last_net_sent)   );
     507           0 :       PRINT( "VERIFY FAILED:  %-10lu %-5.2lf%%  %-5.2lf%%" TEXT_NEWLINE, verify_failed,  100.0 * (double)verify_failed/(double)net_sent,   100.0 * (double)(verify_failed - last_verify_failed)/(double)(net_sent - last_net_sent)     );
     508           0 :       PRINT( "VERIFY SENT:    %-10lu %-5.2lf%%  %-5.2lf%%" TEXT_NEWLINE, verify_sent,    100.0 * (double)verify_sent/(double)net_sent,     100.0 * (double)(verify_sent - last_verify_sent)/(double)(net_sent - last_net_sent)         );
     509           0 :       PRINT( "DEDUP FAILED:   %-10lu %-5.2lf%%  %-5.2lf%%" TEXT_NEWLINE, dedup_failed,   100.0 * (double)dedup_failed/(double)verify_sent, 100.0 * (double)(dedup_failed - last_dedup_failed)/(double)(verify_sent - last_verify_sent) );
     510           0 :       PRINT( "DEDUP SENT:     %-10lu %-5.2lf%%  %-5.2lf%%" TEXT_NEWLINE, dedup_sent,     100.0 * (double)dedup_sent/(double)verify_sent,   100.0 * (double)(dedup_sent - last_dedup_sent)/(double)(verify_sent - last_verify_sent)     );
     511           0 :       PRINT( "PACK OVERRUN:   %-10lu %-5.2lf%%  %-5.2lf%%" TEXT_NEWLINE, pack_overrun,   100.0 * (double)pack_overrun/(double)dedup_sent,  100.0 * (double)(pack_overrun - last_pack_overrun)/(double)(dedup_sent - last_dedup_sent)   );
     512           0 :       PRINT( "PACK INVALID:   %-10lu %-5.2lf%%  %-5.2lf%%" TEXT_NEWLINE, pack_invalid,   100.0 * (double)pack_invalid/(double)dedup_sent,  100.0 * (double)(pack_invalid - last_pack_invalid)/(double)(dedup_sent - last_dedup_sent)   );
     513           0 :       PRINT( "PACK SENT:      %-10lu %-5.2lf%%  %-5.2lf%%" TEXT_NEWLINE, pack_sent,      100.0 * (double)pack_sent/(double)dedup_sent,     100.0 * (double)(pack_sent - last_pack_sent)/(double)(dedup_sent - last_dedup_sent)         );
     514             : 
     515           0 :       last_fseq_sum = fseq_sum;
     516           0 :       last_net_sent = net_sent;
     517           0 :       last_verify_overrun = verify_overrun;
     518           0 :       last_verify_failed = verify_failed;
     519           0 :       last_verify_sent = verify_sent;
     520           0 :       last_dedup_failed = dedup_failed;
     521           0 :       last_dedup_sent = dedup_sent;
     522           0 :       last_pack_overrun = pack_overrun;
     523           0 :       last_pack_invalid = pack_invalid;
     524           0 :       last_pack_sent = pack_sent;
     525           0 :     }
     526             : 
     527             :     /* write entire monitor output buffer */
     528           0 :     write_stdout( buffer, sizeof(buffer) - buf_sz );
     529             : 
     530           0 :     if( FD_UNLIKELY( stop1 || (now-stop)>=0L ) ) {
     531             :       /* Stop once we've been monitoring for duration ns */
     532           0 :       break;
     533           0 :     }
     534             : 
     535           0 :     then = now; tic = toc;
     536           0 :     tile_snap_t * tmp = tile_snap_prv; tile_snap_prv = tile_snap_cur; tile_snap_cur = tmp;
     537           0 :     link_snap_t * tmp2 = link_snap_prv; link_snap_prv = link_snap_cur; link_snap_cur = tmp2;
     538           0 :   }
     539           0 : }
     540             : 
     541             : static void
     542           0 : signal1( int sig ) {
     543           0 :   (void)sig;
     544           0 :   exit( 0 ); /* gracefully exit */
     545           0 : }
     546             : 
     547             : void
     548             : monitor_cmd_fn( args_t *   args,
     549           0 :                 config_t * config ) {
     550           0 :   if( FD_UNLIKELY( args->monitor.with_bench ) ) {
     551           0 :     add_bench_topo( &config->topo,
     552           0 :                     config->development.bench.affinity,
     553           0 :                     config->development.bench.benchg_tile_count,
     554           0 :                     config->development.bench.benchs_tile_count,
     555           0 :                     0UL,
     556           0 :                     0,
     557           0 :                     0.0f,
     558           0 :                     0.0f,
     559           0 :                     0UL,
     560           0 :                     0,
     561           0 :                     0U,
     562           0 :                     0,
     563           0 :                     0U,
     564           0 :                     1,
     565           0 :                     !config->is_firedancer );
     566           0 :   }
     567             : 
     568           0 :   struct sigaction sa = {
     569           0 :     .sa_handler = signal1,
     570           0 :     .sa_flags   = 0,
     571           0 :   };
     572           0 :   if( FD_UNLIKELY( sigaction( SIGTERM, &sa, NULL ) ) )
     573           0 :     FD_LOG_ERR(( "sigaction(SIGTERM) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     574           0 :   if( FD_UNLIKELY( sigaction( SIGINT, &sa, NULL ) ) )
     575           0 :     FD_LOG_ERR(( "sigaction(SIGINT) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     576             : 
     577           0 :   int allow_fds[ 5 ];
     578           0 :   ulong allow_fds_cnt = 0;
     579           0 :   allow_fds[ allow_fds_cnt++ ] = 0; /* stdin */
     580           0 :   allow_fds[ allow_fds_cnt++ ] = 1; /* stdout */
     581           0 :   allow_fds[ allow_fds_cnt++ ] = 2; /* stderr */
     582           0 :   if( FD_LIKELY( fd_log_private_logfile_fd()!=-1 ) )
     583           0 :     allow_fds[ allow_fds_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
     584           0 :   if( FD_UNLIKELY( args->monitor.drain_output_fd!=-1 ) )
     585           0 :     allow_fds[ allow_fds_cnt++ ] = args->monitor.drain_output_fd; /* maybe we are interposing firedancer log output with the monitor */
     586             : 
     587           0 :   fd_topo_join_workspaces( &config->topo, FD_SHMEM_JOIN_MODE_READ_ONLY );
     588             : 
     589           0 :   struct sock_filter seccomp_filter[ 128UL ];
     590           0 :   uint drain_output_fd = args->monitor.drain_output_fd >= 0 ? (uint)args->monitor.drain_output_fd : (uint)-1;
     591           0 :   populate_sock_filter_policy_monitor( 128UL, seccomp_filter, (uint)fd_log_private_logfile_fd(), drain_output_fd );
     592             : 
     593           0 :   if( FD_UNLIKELY( close( config->log.lock_fd ) ) ) FD_LOG_ERR(( "close() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     594             : 
     595           0 :   if( FD_LIKELY( config->development.sandbox ) ) {
     596           0 :     fd_sandbox_enter( config->uid,
     597           0 :                       config->gid,
     598           0 :                       0,
     599           0 :                       0,
     600           0 :                       0,
     601           0 :                       1, /* Keep controlling terminal for main so it can receive Ctrl+C */
     602           0 :                       0,
     603           0 :                       0UL,
     604           0 :                       0UL,
     605           0 :                       0UL,
     606           0 :                       allow_fds_cnt,
     607           0 :                       allow_fds,
     608           0 :                       sock_filter_policy_monitor_instr_cnt,
     609           0 :                       seccomp_filter );
     610           0 :   } else {
     611           0 :     fd_sandbox_switch_uid_gid( config->uid, config->gid );
     612           0 :   }
     613             : 
     614           0 :   fd_topo_fill( &config->topo );
     615             : 
     616           0 :   run_monitor( config,
     617           0 :                args->monitor.drain_output_fd,
     618           0 :                args->monitor.with_sankey,
     619           0 :                args->monitor.dt_min,
     620           0 :                args->monitor.dt_max,
     621           0 :                args->monitor.duration,
     622           0 :                args->monitor.seed,
     623           0 :                args->monitor.ns_per_tic );
     624             : 
     625           0 :   exit( 0 ); /* gracefully exit */
     626           0 : }
     627             : 
     628             : action_t fd_action_monitor = {
     629             :   .name           = "monitor",
     630             :   .args           = monitor_cmd_args,
     631             :   .fn             = monitor_cmd_fn,
     632             :   .require_config = 1,
     633             :   .perm           = monitor_cmd_perm,
     634             :   .description    = "Monitor a locally running Firedancer instance with a terminal GUI",
     635             : };

Generated by: LCOV version 1.14