Line data Source code
1 : #define _GNU_SOURCE
2 : #include "../../shared/fd_config.h"
3 : #include "../../shared/fd_action.h"
4 : #include "../../platform/fd_sys_util.h"
5 : #include "../../../disco/metrics/fd_metrics.h"
6 : #include "../../../util/pod/fd_pod.h"
7 :
8 : #include <errno.h>
9 : #include <stdio.h>
10 : #include <stdlib.h>
11 : #include <unistd.h>
12 : #include <sys/wait.h>
13 : #include <sys/random.h>
14 :
15 : static int record_pid;
16 :
17 : static void
18 0 : parent_signal( int sig ) {
19 0 : FD_LOG_NOTICE(( "Received signal %s\n", fd_io_strsignal( sig ) ));
20 0 : if( FD_LIKELY( record_pid ) ) {
21 0 : if( FD_UNLIKELY( -1==kill( record_pid, SIGINT ) ) ) FD_LOG_ERR(( "kill() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
22 0 : }
23 0 : }
24 :
25 : static void
26 0 : install_parent_signals( void ) {
27 0 : struct sigaction sa = {
28 0 : .sa_handler = parent_signal,
29 0 : .sa_flags = 0,
30 0 : };
31 0 : if( FD_UNLIKELY( sigaction( SIGTERM, &sa, NULL ) ) )
32 0 : FD_LOG_ERR(( "sigaction(SIGTERM) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
33 0 : if( FD_UNLIKELY( sigaction( SIGINT, &sa, NULL ) ) )
34 0 : FD_LOG_ERR(( "sigaction(SIGINT) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
35 0 : }
36 :
37 : void
38 : flame_cmd_perm( args_t * args FD_PARAM_UNUSED,
39 : fd_cap_chk_t * chk,
40 0 : config_t const * config FD_PARAM_UNUSED ) {
41 0 : fd_cap_chk_root( chk, "flame", "read system performance counters with `/usr/bin/perf`" );
42 0 : }
43 :
44 : void
45 : flame_cmd_args( int * pargc,
46 : char *** pargv,
47 0 : args_t * args ) {
48 :
49 0 : if( FD_UNLIKELY( !*pargc ) ) FD_LOG_ERR(( "usage: flame [all|tile|tile:idx|agave]" ));
50 0 : strncpy( args->flame.name, **pargv, sizeof( args->flame.name ) - 1 );
51 :
52 0 : (*pargc)--;
53 0 : (*pargv)++;
54 0 : }
55 :
56 : void
57 : flame_cmd_fn( args_t * args,
58 0 : config_t * config ) {
59 0 : install_parent_signals();
60 :
61 0 : fd_topo_join_workspaces( &config->topo, FD_SHMEM_JOIN_MODE_READ_ONLY );
62 0 : fd_topo_fill( &config->topo );
63 :
64 0 : int sandbox = fd_pod_query_int( config->topo.props, "sandbox", 0 );
65 0 : if( FD_UNLIKELY( sandbox ) ) {
66 0 : FD_LOG_WARNING(( "flame command will not resolve symbols correctly when "
67 0 : "Firedancer is running sandboxed, and all stacks will "
68 0 : "show as [unknown]" ));
69 0 : }
70 :
71 0 : ulong tile_cnt = 0UL;
72 0 : ulong tile_idxs[ 128UL ];
73 :
74 0 : int whole_process = 0;
75 0 : if( FD_UNLIKELY( !strcmp( "all", args->flame.name ) ) ) {
76 0 : FD_TEST( config->topo.tile_cnt<sizeof(tile_idxs)/sizeof(tile_idxs[0]) );
77 0 : for( ulong i=0UL; i<config->topo.tile_cnt; i++ ) {
78 0 : tile_idxs[ tile_cnt ] = i;
79 0 : tile_cnt++;
80 0 : }
81 0 : } else if( FD_UNLIKELY( !strcmp( "agave", args->flame.name ) ) ) {
82 : /* Find the bank tile so we can get the Agave PID */
83 0 : ulong bank_tile_idx = fd_topo_find_tile( &config->topo, "bank", 0UL );
84 0 : if( FD_UNLIKELY( bank_tile_idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile `bank` not found" ));
85 0 : whole_process = 1;
86 0 : tile_idxs[ 0 ] = bank_tile_idx;
87 0 : tile_cnt = 1UL;
88 0 : } else {
89 0 : char * sep = strchr( args->flame.name, ':' );
90 :
91 0 : ulong tile_idx;
92 0 : if( FD_UNLIKELY( !sep ) ) {
93 0 : tile_idx = fd_topo_find_tile( &config->topo, args->flame.name, 0UL );
94 0 : } else {
95 0 : char * endptr;
96 0 : *sep = '\0';
97 0 : ulong kind_id = strtoul( sep+1, &endptr, 10 );
98 0 : if( FD_UNLIKELY( *endptr!='\0' || kind_id==ULONG_MAX ) ) FD_LOG_ERR(( "invalid tile kind id provided `%s`", sep+1 ));
99 0 : tile_idx = fd_topo_find_tile( &config->topo, args->flame.name, kind_id );
100 0 : }
101 :
102 0 : if( FD_UNLIKELY( tile_idx==ULONG_MAX ) ) FD_LOG_ERR(( "tile `%s` not found", args->flame.name ));
103 0 : tile_idxs[ 0 ] = tile_idx;
104 0 : tile_cnt = 1UL;
105 0 : }
106 :
107 0 : char threads[ 4096 ] = {0};
108 0 : ulong len = 0UL;
109 0 : for( ulong i=0UL; i<tile_cnt; i++ ) {
110 0 : ulong tid = fd_metrics_tile( config->topo.tiles[ tile_idxs[ i ] ].metrics )[ FD_METRICS_GAUGE_TILE_TID_OFF ];
111 0 : ulong pid = fd_metrics_tile( config->topo.tiles[ tile_idxs[ i ] ].metrics )[ FD_METRICS_GAUGE_TILE_PID_OFF ];
112 :
113 0 : FD_TEST( pid<=INT_MAX );
114 0 : if( FD_UNLIKELY( -1==kill( (int)tid, 0 ) ) ) {
115 0 : if( FD_LIKELY( config->topo.tiles[ i ].allow_shutdown ) ) continue;
116 :
117 0 : if( FD_UNLIKELY( errno==ESRCH ) ) FD_LOG_ERR(( "tile %s:%lu is not running", config->topo.tiles[ i ].name, config->topo.tiles[ i ].kind_id ));
118 0 : else FD_LOG_ERR(( "kill() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
119 0 : }
120 :
121 0 : ulong arg_len;
122 0 : FD_TEST( fd_cstr_printf_check( threads+len, sizeof(threads)-len, &arg_len, "%lu", fd_ulong_if( whole_process, pid, tid ) ) );
123 0 : len += arg_len;
124 :
125 0 : if( FD_LIKELY( i!=tile_cnt-1UL ) ) {
126 0 : FD_TEST( fd_cstr_printf_check( threads+len, sizeof(threads)-len, NULL, "," ) );
127 0 : len += 1UL;
128 0 : }
129 0 : }
130 0 : FD_TEST( len<sizeof(threads) );
131 :
132 0 : FD_LOG_NOTICE(( "/usr/bin/perf script record flamegraph -F 99 -%c %s && /usr/bin/perf script report flamegraph", fd_char_if( whole_process, 'p', 't' ), threads ));
133 :
134 0 : record_pid = fork();
135 0 : if( FD_UNLIKELY( -1==record_pid ) ) FD_LOG_ERR(( "fork() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
136 0 : if( FD_LIKELY( !record_pid ) ) {
137 0 : char * args[ 11 ] = {
138 0 : "/usr/bin/perf",
139 0 : "record",
140 0 : "-g",
141 0 : "-F",
142 0 : "99",
143 0 : whole_process ? "-p" : "-t",
144 0 : threads,
145 0 : NULL,
146 0 : };
147 0 : if( FD_UNLIKELY( -1==execve( "/usr/bin/perf", (char * const *)args, NULL ) ) ) FD_LOG_ERR(( "execve() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
148 0 : }
149 :
150 0 : FD_LOG_NOTICE(( "Perf collection running. Send SIGINT / Crl+C to stop." ));
151 :
152 0 : for(;;) {
153 0 : int wstatus;
154 0 : int exited_pid = waitpid( -1, &wstatus, 0 );
155 0 : if( FD_UNLIKELY( -1==exited_pid ) ) {
156 0 : if( FD_LIKELY( errno==EAGAIN || errno==EINTR ) ) continue;
157 0 : FD_LOG_ERR(( "waitpid() failed (%d-%s)", errno, fd_io_strerror( errno ) ));
158 0 : }
159 :
160 0 : int graceful_exit = !WIFEXITED( wstatus ) && WTERMSIG( wstatus )==SIGINT;
161 0 : if( FD_UNLIKELY( !graceful_exit ) ) {
162 0 : if( FD_UNLIKELY( !WIFEXITED( wstatus ) ) ) FD_LOG_ERR(( "perf record exited unexpectedly with signal %d (%s)", WTERMSIG( wstatus ), fd_io_strsignal( WTERMSIG( wstatus ) ) ));
163 0 : if( FD_UNLIKELY( WEXITSTATUS( wstatus ) ) ) FD_LOG_ERR(( "perf record exited unexpectedly with code %d", WEXITSTATUS( wstatus ) ));
164 0 : }
165 0 : break;
166 0 : }
167 :
168 0 : int report_pid = fork();
169 0 : if( FD_UNLIKELY( -1==report_pid ) ) FD_LOG_ERR(( "fork() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
170 0 : if( FD_LIKELY( !report_pid ) ) {
171 0 : char * args[ 7 ] = {
172 0 : "/usr/bin/perf",
173 0 : "script",
174 0 : "report",
175 0 : "flamegraph",
176 0 : NULL,
177 0 : };
178 0 : if( FD_UNLIKELY( -1==execve( "/usr/bin/perf", (char * const *)args, NULL ) ) ) FD_LOG_ERR(( "execve() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
179 0 : }
180 :
181 0 : for(;;) {
182 0 : int wstatus;
183 0 : int exited_pid = waitpid( -1, &wstatus, 0 );
184 0 : if( FD_UNLIKELY( -1==exited_pid ) ) {
185 0 : if( FD_LIKELY( errno==EAGAIN || errno==EINTR ) ) continue;
186 0 : FD_LOG_ERR(( "waitpid() failed (%d-%s)", errno, fd_io_strerror( errno ) ));
187 0 : }
188 :
189 0 : if( FD_UNLIKELY( !WIFEXITED( wstatus ) ) ) FD_LOG_ERR(( "perf report exited unexpectedly with signal %d (%s)", WTERMSIG( wstatus ), fd_io_strsignal( WTERMSIG( wstatus ) ) ));
190 0 : if( FD_UNLIKELY( WEXITSTATUS( wstatus ) ) ) FD_LOG_ERR(( "perf report exited unexpectedly with code %d", WEXITSTATUS( wstatus ) ));
191 0 : break;
192 0 : }
193 :
194 0 : fd_sys_util_exit_group( 0 );
195 0 : }
196 :
197 : action_t fd_action_flame = {
198 : .name = "flame",
199 : .args = flame_cmd_args,
200 : .fn = flame_cmd_fn,
201 : .perm = flame_cmd_perm,
202 : .description = "Capture a perf flamegraph",
203 : .is_diagnostic = 1
204 : };
|