LCOV - code coverage report
Current view: top level - app/shared_dev/commands - flame.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 136 0.0 %
Date: 2025-08-05 05:04:49 Functions: 0 5 0.0 %

          Line data    Source code
       1             : #define _GNU_SOURCE
       2             : #include "../../shared/fd_config.h"
       3             : #include "../../shared/fd_action.h"
       4             : #include "../../platform/fd_sys_util.h"
       5             : #include "../../../disco/metrics/fd_metrics.h"
       6             : 
       7             : #include <errno.h>
       8             : #include <stdio.h>
       9             : #include <stdlib.h>
      10             : #include <unistd.h>
      11             : #include <sys/wait.h>
      12             : #include <sys/random.h>
      13             : 
      14             : static int record_pid;
      15             : 
      16             : static void
      17           0 : parent_signal( int sig ) {
      18           0 :   FD_LOG_NOTICE(( "Received signal %s\n", fd_io_strsignal( sig ) ));
      19           0 :   if( FD_LIKELY( record_pid ) ) {
      20           0 :     if( FD_UNLIKELY( -1==kill( record_pid, SIGINT ) ) ) FD_LOG_ERR(( "kill() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
      21           0 :   }
      22           0 : }
      23             : 
      24             : static void
      25           0 : install_parent_signals( void ) {
      26           0 :   struct sigaction sa = {
      27           0 :     .sa_handler = parent_signal,
      28           0 :     .sa_flags   = 0,
      29           0 :   };
      30           0 :   if( FD_UNLIKELY( sigaction( SIGTERM, &sa, NULL ) ) )
      31           0 :     FD_LOG_ERR(( "sigaction(SIGTERM) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
      32           0 :   if( FD_UNLIKELY( sigaction( SIGINT, &sa, NULL ) ) )
      33           0 :     FD_LOG_ERR(( "sigaction(SIGINT) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
      34           0 : }
      35             : 
      36             : void
      37             : flame_cmd_perm( args_t *         args   FD_PARAM_UNUSED,
      38             :                 fd_cap_chk_t *   chk,
      39           0 :                 config_t const * config FD_PARAM_UNUSED ) {
      40           0 :   fd_cap_chk_root( chk, "flame", "read system performance counters with `/usr/bin/perf`" );
      41           0 : }
      42             : 
      43             : void
      44             : flame_cmd_args( int *    pargc,
      45             :                 char *** pargv,
      46           0 :                 args_t * args ) {
      47             : 
      48           0 :   if( FD_UNLIKELY( !*pargc ) ) FD_LOG_ERR(( "usage: flame [all|tile|tile:idx|agave]" ));
      49           0 :   strncpy( args->flame.name, **pargv, sizeof( args->flame.name ) - 1 );
      50             : 
      51           0 :   (*pargc)--;
      52           0 :   (*pargv)++;
      53           0 : }
      54             : 
      55             : void
      56             : flame_cmd_fn( args_t *   args,
      57           0 :               config_t * config ) {
      58           0 :   install_parent_signals();
      59             : 
      60           0 :   fd_topo_join_workspaces( &config->topo, FD_SHMEM_JOIN_MODE_READ_ONLY );
      61           0 :   fd_topo_fill( &config->topo );
      62             : 
      63           0 :   ulong tile_cnt = 0UL;
      64           0 :   ulong tile_idxs[ 128UL ];
      65             : 
      66           0 :   int whole_process = 0;
      67           0 :   if( FD_UNLIKELY( !strcmp( "all", args->flame.name ) ) ) {
      68           0 :     FD_TEST( config->topo.tile_cnt<sizeof(tile_idxs)/sizeof(tile_idxs[0]) );
      69           0 :     for( ulong i=0UL; i<config->topo.tile_cnt; i++ ) {
      70           0 :       tile_idxs[ tile_cnt ] = i;
      71           0 :       tile_cnt++;
      72           0 :     }
      73           0 :   } else if( FD_UNLIKELY( !strcmp( "agave", args->flame.name ) ) ) {
      74             :     /* Find the bank tile so we can get the Agave PID */
      75           0 :     ulong bank_tile_idx = fd_topo_find_tile( &config->topo, "bank", 0UL );
      76           0 :     if( FD_UNLIKELY( bank_tile_idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile `bank` not found" ));
      77           0 :     whole_process = 1;
      78           0 :     tile_idxs[ 0 ] = bank_tile_idx;
      79           0 :     tile_cnt = 1UL;
      80           0 :   } else {
      81           0 :     char * sep = strchr( args->flame.name, ':' );
      82             : 
      83           0 :     ulong tile_idx;
      84           0 :     if( FD_UNLIKELY( !sep ) ) {
      85           0 :       tile_idx = fd_topo_find_tile( &config->topo, args->flame.name, 0UL );
      86           0 :     } else {
      87           0 :       char * endptr;
      88           0 :       *sep = '\0';
      89           0 :       ulong kind_id = strtoul( sep+1, &endptr, 10 );
      90           0 :       if( FD_UNLIKELY( *endptr!='\0' || kind_id==ULONG_MAX ) ) FD_LOG_ERR(( "invalid tile kind id provided `%s`", sep+1 ));
      91           0 :       tile_idx = fd_topo_find_tile( &config->topo, args->flame.name, kind_id );
      92           0 :     }
      93             : 
      94           0 :     if( FD_UNLIKELY( tile_idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile `%s` not found", args->flame.name ));
      95           0 :     tile_idxs[ 0 ] = tile_idx;
      96           0 :     tile_cnt = 1UL;
      97           0 :   }
      98             : 
      99           0 :   char threads[ 4096 ] = {0};
     100           0 :   ulong len = 0UL;
     101           0 :   for( ulong i=0UL; i<tile_cnt; i++ ) {
     102           0 :     if( FD_LIKELY( i!=0UL ) ) {
     103           0 :       FD_TEST( fd_cstr_printf_check( threads+len, sizeof(threads)-len, NULL, "," ) );
     104           0 :       len += 1UL;
     105           0 :     }
     106             : 
     107           0 :     ulong tid = fd_metrics_tile( config->topo.tiles[ tile_idxs[ i ] ].metrics )[ FD_METRICS_GAUGE_TILE_TID_OFF ];
     108           0 :     ulong pid = fd_metrics_tile( config->topo.tiles[ tile_idxs[ i ] ].metrics )[ FD_METRICS_GAUGE_TILE_PID_OFF ];
     109             : 
     110           0 :     FD_TEST( pid<=INT_MAX );
     111           0 :     if( FD_UNLIKELY( -1==kill( (int)pid, 0 ) ) ) {
     112           0 :       if( FD_UNLIKELY( errno==ESRCH ) ) FD_LOG_ERR(( "tile %s:%lu is not running", config->topo.tiles[ i ].name, config->topo.tiles[ i ].kind_id ));
     113           0 :       else                              FD_LOG_ERR(( "kill() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     114           0 :     }
     115             : 
     116           0 :     ulong arg_len;
     117           0 :     FD_TEST( fd_cstr_printf_check( threads+len, sizeof(threads)-len, &arg_len, "%lu", fd_ulong_if( whole_process, pid, tid ) ) );
     118           0 :     len += arg_len;
     119           0 :   }
     120           0 :   FD_TEST( len<sizeof(threads) );
     121             : 
     122           0 :   FD_LOG_NOTICE(( "/usr/bin/perf script record flamegraph -F 99 -%c %s && /usr/bin/perf script report flamegraph", fd_char_if( whole_process, 'p', 't' ), threads ));
     123             : 
     124           0 :   record_pid = fork();
     125           0 :   if( FD_UNLIKELY( -1==record_pid ) ) FD_LOG_ERR(( "fork() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     126           0 :   if( FD_LIKELY( !record_pid ) ) {
     127           0 :     char * args[ 11 ] = {
     128           0 :       "/usr/bin/perf",
     129           0 :       "script",
     130           0 :       "record",
     131           0 :       "flamegraph",
     132           0 :       "-F",
     133           0 :       "99",
     134           0 :       whole_process ? "-p" : "-t",
     135           0 :       threads,
     136           0 :       NULL,
     137           0 :     };
     138           0 :     if( FD_UNLIKELY( -1==execve( "/usr/bin/perf", (char * const *)args, NULL ) ) ) FD_LOG_ERR(( "execve() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     139           0 :   }
     140             : 
     141           0 :   FD_LOG_NOTICE(( "Perf collection running. Send SIGINT / Crl+C to stop." ));
     142             : 
     143           0 :   for(;;) {
     144           0 :     int wstatus;
     145           0 :     int exited_pid = waitpid( -1, &wstatus, 0 );
     146           0 :     if( FD_UNLIKELY( -1==exited_pid ) ) {
     147           0 :       if( FD_LIKELY( errno==EAGAIN || errno==EINTR ) ) continue;
     148           0 :       FD_LOG_ERR(( "waitpid() failed (%d-%s)", errno, fd_io_strerror( errno ) ));
     149           0 :     }
     150             : 
     151           0 :     int graceful_exit = !WIFEXITED( wstatus ) && WTERMSIG( wstatus )==SIGINT;
     152           0 :     if( FD_UNLIKELY( !graceful_exit ) ) {
     153           0 :       if( FD_UNLIKELY( !WIFEXITED( wstatus ) ) ) FD_LOG_ERR(( "perf record exited unexpectedly with signal %d (%s)", WTERMSIG( wstatus ), fd_io_strsignal( WTERMSIG( wstatus ) ) ));
     154           0 :       if( FD_UNLIKELY( WEXITSTATUS( wstatus ) ) ) FD_LOG_ERR(( "perf record exited unexpectedly with code %d", WEXITSTATUS( wstatus ) ));
     155           0 :     }
     156           0 :     break;
     157           0 :   }
     158             : 
     159           0 :   int report_pid = fork();
     160           0 :   if( FD_UNLIKELY( -1==report_pid ) ) FD_LOG_ERR(( "fork() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     161           0 :   if( FD_LIKELY( !report_pid ) ) {
     162           0 :     char * args[ 7 ] = {
     163           0 :       "/usr/bin/perf",
     164           0 :       "script",
     165           0 :       "report",
     166           0 :       "flamegraph",
     167           0 :       NULL,
     168           0 :     };
     169           0 :     if( FD_UNLIKELY( -1==execve( "/usr/bin/perf", (char * const *)args, NULL ) ) ) FD_LOG_ERR(( "execve() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     170           0 :   }
     171             : 
     172           0 :   for(;;) {
     173           0 :     int wstatus;
     174           0 :     int exited_pid = waitpid( -1, &wstatus, 0 );
     175           0 :     if( FD_UNLIKELY( -1==exited_pid ) ) {
     176           0 :       if( FD_LIKELY( errno==EAGAIN || errno==EINTR ) ) continue;
     177           0 :       FD_LOG_ERR(( "waitpid() failed (%d-%s)", errno, fd_io_strerror( errno ) ));
     178           0 :     }
     179             : 
     180           0 :     if( FD_UNLIKELY( !WIFEXITED( wstatus ) ) ) FD_LOG_ERR(( "perf report exited unexpectedly with signal %d (%s)", WTERMSIG( wstatus ), fd_io_strsignal( WTERMSIG( wstatus ) ) ));
     181           0 :     if( FD_UNLIKELY( WEXITSTATUS( wstatus ) ) ) FD_LOG_ERR(( "perf report exited unexpectedly with code %d", WEXITSTATUS( wstatus ) ));
     182           0 :     break;
     183           0 :   }
     184             : 
     185           0 :   fd_sys_util_exit_group( 0 );
     186           0 : }
     187             : 
     188             : action_t fd_action_flame = {
     189             :   .name          = "flame",
     190             :   .args          = flame_cmd_args,
     191             :   .fn            = flame_cmd_fn,
     192             :   .perm          = flame_cmd_perm,
     193             :   .description   = "Capture a perf flamegraph",
     194             :   .is_diagnostic = 1
     195             : };

Generated by: LCOV version 1.14