LCOV - code coverage report
Current view: top level - app/shared_dev/commands - flame.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 136 0.0 %
Date: 2025-07-01 05:00:49 Functions: 0 5 0.0 %

          Line data    Source code
       1             : #define _GNU_SOURCE
       2             : #include "../../shared/fd_config.h"
       3             : #include "../../shared/fd_action.h"
       4             : #include "../../platform/fd_sys_util.h"
       5             : #include "../../../disco/metrics/fd_metrics.h"
       6             : 
       7             : #include <errno.h>
       8             : #include <stdio.h>
       9             : #include <unistd.h>
      10             : #include <sys/wait.h>
      11             : #include <sys/random.h>
      12             : 
      13             : static int record_pid;
      14             : 
      15             : static void
      16           0 : parent_signal( int sig ) {
      17           0 :   FD_LOG_NOTICE(( "Received signal %s\n", fd_io_strsignal( sig ) ));
      18           0 :   if( FD_LIKELY( record_pid ) ) {
      19           0 :     if( FD_UNLIKELY( -1==kill( record_pid, SIGINT ) ) ) FD_LOG_ERR(( "kill() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
      20           0 :   }
      21           0 : }
      22             : 
      23             : static void
      24           0 : install_parent_signals( void ) {
      25           0 :   struct sigaction sa = {
      26           0 :     .sa_handler = parent_signal,
      27           0 :     .sa_flags   = 0,
      28           0 :   };
      29           0 :   if( FD_UNLIKELY( sigaction( SIGTERM, &sa, NULL ) ) )
      30           0 :     FD_LOG_ERR(( "sigaction(SIGTERM) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
      31           0 :   if( FD_UNLIKELY( sigaction( SIGINT, &sa, NULL ) ) )
      32           0 :     FD_LOG_ERR(( "sigaction(SIGINT) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
      33           0 : }
      34             : 
      35             : void
      36             : flame_cmd_perm( args_t *         args   FD_PARAM_UNUSED,
      37             :                 fd_cap_chk_t *   chk,
      38           0 :                 config_t const * config FD_PARAM_UNUSED ) {
      39           0 :   fd_cap_chk_root( chk, "flame", "read system performance counters with `/usr/bin/perf`" );
      40           0 : }
      41             : 
      42             : void
      43             : flame_cmd_args( int *    pargc,
      44             :                 char *** pargv,
      45           0 :                 args_t * args ) {
      46             : 
      47           0 :   if( FD_UNLIKELY( !*pargc ) ) FD_LOG_ERR(( "usage: flame [all|tile|tile:idx|agave]" ));
      48           0 :   strncpy( args->flame.name, **pargv, sizeof( args->flame.name ) - 1 );
      49             : 
      50           0 :   (*pargc)--;
      51           0 :   (*pargv)++;
      52           0 : }
      53             : 
      54             : void
      55             : flame_cmd_fn( args_t *   args,
      56           0 :               config_t * config ) {
      57           0 :   install_parent_signals();
      58             : 
      59           0 :   fd_topo_join_workspaces( &config->topo, FD_SHMEM_JOIN_MODE_READ_ONLY );
      60           0 :   fd_topo_fill( &config->topo );
      61             : 
      62           0 :   ulong tile_cnt = 0UL;
      63           0 :   ulong tile_idxs[ 128UL ];
      64             : 
      65           0 :   int whole_process = 0;
      66           0 :   if( FD_UNLIKELY( !strcmp( "all", args->flame.name ) ) ) {
      67           0 :     FD_TEST( config->topo.tile_cnt<sizeof(tile_idxs)/sizeof(tile_idxs[0]) );
      68           0 :     for( ulong i=0UL; i<config->topo.tile_cnt; i++ ) {
      69           0 :       tile_idxs[ tile_cnt ] = i;
      70           0 :       tile_cnt++;
      71           0 :     }
      72           0 :   } else if( FD_UNLIKELY( !strcmp( "agave", args->flame.name ) ) ) {
      73             :     /* Find the bank tile so we can get the Agave PID */
      74           0 :     ulong bank_tile_idx = fd_topo_find_tile( &config->topo, "bank", 0UL );
      75           0 :     if( FD_UNLIKELY( bank_tile_idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile `bank` not found" ));
      76           0 :     whole_process = 1;
      77           0 :     tile_idxs[ 0 ] = bank_tile_idx;
      78           0 :     tile_cnt = 1UL;
      79           0 :   } else {
      80           0 :     char * sep = strchr( args->flame.name, ':' );
      81             : 
      82           0 :     ulong tile_idx;
      83           0 :     if( FD_UNLIKELY( !sep ) ) {
      84           0 :       tile_idx = fd_topo_find_tile( &config->topo, args->flame.name, 0UL );
      85           0 :     } else {
      86           0 :       char * endptr;
      87           0 :       *sep = '\0';
      88           0 :       ulong kind_id = strtoul( sep+1, &endptr, 10 );
      89           0 :       if( FD_UNLIKELY( *endptr!='\0' || kind_id==ULONG_MAX ) ) FD_LOG_ERR(( "invalid tile kind id provided `%s`", sep+1 ));
      90           0 :       tile_idx = fd_topo_find_tile( &config->topo, args->flame.name, kind_id );
      91           0 :     }
      92             : 
      93           0 :     if( FD_UNLIKELY( tile_idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile `%s` not found", args->flame.name ));
      94           0 :     tile_idxs[ 0 ] = tile_idx;
      95           0 :     tile_cnt = 1UL;
      96           0 :   }
      97             : 
      98           0 :   char threads[ 4096 ] = {0};
      99           0 :   ulong len = 0UL;
     100           0 :   for( ulong i=0UL; i<tile_cnt; i++ ) {
     101           0 :     if( FD_LIKELY( i!=0UL ) ) {
     102           0 :       FD_TEST( fd_cstr_printf_check( threads+len, sizeof(threads)-len, NULL, "," ) );
     103           0 :       len += 1UL;
     104           0 :     }
     105             : 
     106           0 :     ulong tid = fd_metrics_tile( config->topo.tiles[ tile_idxs[ i ] ].metrics )[ FD_METRICS_GAUGE_TILE_TID_OFF ];
     107           0 :     ulong pid = fd_metrics_tile( config->topo.tiles[ tile_idxs[ i ] ].metrics )[ FD_METRICS_GAUGE_TILE_PID_OFF ];
     108             : 
     109           0 :     FD_TEST( pid<=INT_MAX );
     110           0 :     if( FD_UNLIKELY( -1==kill( (int)pid, 0 ) ) ) {
     111           0 :       if( FD_UNLIKELY( errno==ESRCH ) ) FD_LOG_ERR(( "tile %s:%lu is not running", config->topo.tiles[ i ].name, config->topo.tiles[ i ].kind_id ));
     112           0 :       else                              FD_LOG_ERR(( "kill() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     113           0 :     }
     114             : 
     115           0 :     ulong arg_len;
     116           0 :     FD_TEST( fd_cstr_printf_check( threads+len, sizeof(threads)-len, &arg_len, "%lu", fd_ulong_if( whole_process, pid, tid ) ) );
     117           0 :     len += arg_len;
     118           0 :   }
     119           0 :   FD_TEST( len<sizeof(threads) );
     120             : 
     121           0 :   FD_LOG_NOTICE(( "/usr/bin/perf script record flamegraph -F 99 -%c %s && /usr/bin/perf script report flamegraph", fd_char_if( whole_process, 'p', 't' ), threads ));
     122             : 
     123           0 :   record_pid = fork();
     124           0 :   if( FD_UNLIKELY( -1==record_pid ) ) FD_LOG_ERR(( "fork() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     125           0 :   if( FD_LIKELY( !record_pid ) ) {
     126           0 :     char * args[ 11 ] = {
     127           0 :       "/usr/bin/perf",
     128           0 :       "script",
     129           0 :       "record",
     130           0 :       "flamegraph",
     131           0 :       "-F",
     132           0 :       "99",
     133           0 :       whole_process ? "-p" : "-t",
     134           0 :       threads,
     135           0 :       NULL,
     136           0 :     };
     137           0 :     if( FD_UNLIKELY( -1==execve( "/usr/bin/perf", (char * const *)args, NULL ) ) ) FD_LOG_ERR(( "execve() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     138           0 :   }
     139             : 
     140           0 :   FD_LOG_NOTICE(( "Perf collection running. Send SIGINT / Crl+C to stop." ));
     141             : 
     142           0 :   for(;;) {
     143           0 :     int wstatus;
     144           0 :     int exited_pid = waitpid( -1, &wstatus, 0 );
     145           0 :     if( FD_UNLIKELY( -1==exited_pid ) ) {
     146           0 :       if( FD_LIKELY( errno==EAGAIN || errno==EINTR ) ) continue;
     147           0 :       FD_LOG_ERR(( "waitpid() failed (%d-%s)", errno, fd_io_strerror( errno ) ));
     148           0 :     }
     149             : 
     150           0 :     int graceful_exit = !WIFEXITED( wstatus ) && WTERMSIG( wstatus )==SIGINT;
     151           0 :     if( FD_UNLIKELY( !graceful_exit ) ) {
     152           0 :       if( FD_UNLIKELY( !WIFEXITED( wstatus ) ) ) FD_LOG_ERR(( "perf record exited unexpectedly with signal %d (%s)", WTERMSIG( wstatus ), fd_io_strsignal( WTERMSIG( wstatus ) ) ));
     153           0 :       if( FD_UNLIKELY( WEXITSTATUS( wstatus ) ) ) FD_LOG_ERR(( "perf record exited unexpectedly with code %d", WEXITSTATUS( wstatus ) ));
     154           0 :     }
     155           0 :     break;
     156           0 :   }
     157             : 
     158           0 :   int report_pid = fork();
     159           0 :   if( FD_UNLIKELY( -1==report_pid ) ) FD_LOG_ERR(( "fork() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     160           0 :   if( FD_LIKELY( !report_pid ) ) {
     161           0 :     char * args[ 7 ] = {
     162           0 :       "/usr/bin/perf",
     163           0 :       "script",
     164           0 :       "report",
     165           0 :       "flamegraph",
     166           0 :       NULL,
     167           0 :     };
     168           0 :     if( FD_UNLIKELY( -1==execve( "/usr/bin/perf", (char * const *)args, NULL ) ) ) FD_LOG_ERR(( "execve() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     169           0 :   }
     170             : 
     171           0 :   for(;;) {
     172           0 :     int wstatus;
     173           0 :     int exited_pid = waitpid( -1, &wstatus, 0 );
     174           0 :     if( FD_UNLIKELY( -1==exited_pid ) ) {
     175           0 :       if( FD_LIKELY( errno==EAGAIN || errno==EINTR ) ) continue;
     176           0 :       FD_LOG_ERR(( "waitpid() failed (%d-%s)", errno, fd_io_strerror( errno ) ));
     177           0 :     }
     178             : 
     179           0 :     if( FD_UNLIKELY( !WIFEXITED( wstatus ) ) ) FD_LOG_ERR(( "perf report exited unexpectedly with signal %d (%s)", WTERMSIG( wstatus ), fd_io_strsignal( WTERMSIG( wstatus ) ) ));
     180           0 :     if( FD_UNLIKELY( WEXITSTATUS( wstatus ) ) ) FD_LOG_ERR(( "perf report exited unexpectedly with code %d", WEXITSTATUS( wstatus ) ));
     181           0 :     break;
     182           0 :   }
     183             : 
     184           0 :   fd_sys_util_exit_group( 0 );
     185           0 : }
     186             : 
     187             : action_t fd_action_flame = {
     188             :   .name          = "flame",
     189             :   .args          = flame_cmd_args,
     190             :   .fn            = flame_cmd_fn,
     191             :   .perm          = flame_cmd_perm,
     192             :   .description   = "Capture a perf flamegraph",
     193             :   .is_diagnostic = 1
     194             : };

Generated by: LCOV version 1.14