Line data Source code
1 : #include "../../shared/fd_config.h"
2 : #include "../../shared/fd_action.h"
3 : #include "../../../disco/metrics/fd_metrics.h"
4 :
5 : #include <errno.h>
6 : #include <signal.h>
7 : #include <stdio.h>
8 : #include <stdlib.h>
9 : #include <unistd.h>
10 :
11 : extern action_t * ACTIONS[];
12 :
13 : static int running = 1;
14 :
15 : static void
16 0 : exit_signal( int sig FD_PARAM_UNUSED ) {
17 0 : running = 0;
18 0 : }
19 :
20 : static void
21 : metrics_record_cmd_args( int * pargc,
22 : char *** pargv,
23 0 : args_t * args ) {
24 :
25 0 : if( fd_env_strip_cmdline_contains( pargc, pargv, "--help" ) ||
26 0 : fd_env_strip_cmdline_contains( pargc, pargv, "-h" ) ||
27 0 : fd_env_strip_cmdline_contains( pargc, pargv, "help" ) ) {
28 0 : fputs(
29 0 : "\nUsage: firedancer-dev metrics-record [GLOBAL FLAGS] [FLAGS] metric0 metric1 ... metricN\n"
30 0 : "\n"
31 0 : "Flags:\n"
32 0 : " --topo TOPO Attach to metrics of non-standard topo, such as snapshot-load\n"
33 0 : " --interval SECONDS How frequently to print a row. Defaults to 1.0 seconds.\n"
34 0 : "\n"
35 0 : "Metrics:\n"
36 0 : " Selector format: `metric_name[,tile_kind[,tile_kind_id]]`\n"
37 0 : "\n"
38 0 : " Metrics are primarily identified by their name string. A tile kind string can also\n"
39 0 : " be given to limit the given metric to only one specific tile type. Similarly, a\n"
40 0 : " tile kind id can be given (only if tile_kind is also given) to limit to a particular\n"
41 0 : " tile instance. If these tile kind filters are not given, all matching metrics will\n"
42 0 : " be recorded.\n"
43 0 : "\n"
44 0 : " Examples:\n"
45 0 : " tile_pid\n"
46 0 : " tile_backpressure_count,gossip\n"
47 0 : " tile_status,net,1\n"
48 0 : "\n",
49 0 : stderr );
50 0 : exit( EXIT_SUCCESS );
51 0 : }
52 :
53 0 : fd_memset( &args->metrics_record, 0, sizeof(args->metrics_record) );
54 0 : fd_cstr_ncpy( args->metrics_record.topo, fd_env_strip_cmdline_cstr( pargc, pargv, "--topo", NULL, "" ), sizeof(args->metrics_record.topo) );
55 :
56 0 : float _interval = fd_env_strip_cmdline_float( pargc, pargv, "--interval", NULL, 1.0f );
57 0 : args->metrics_record.interval_ns = fd_ulong_max( 1UL, (ulong)(_interval*1.0e9f) );
58 :
59 0 : ulong const selectors_cnt_max = sizeof(args->metrics_record.selectors)/sizeof(args->metrics_record.selectors[0]);
60 0 : while( *pargc ) {
61 0 : if( FD_UNLIKELY( args->metrics_record.selectors_cnt>=selectors_cnt_max ) ) FD_LOG_ERR(( "too many metric selectors given %lu", selectors_cnt_max ));
62 0 : struct fd_action_metrics_record_selector * selector = &args->metrics_record.selectors[ args->metrics_record.selectors_cnt++ ];
63 :
64 0 : char * name = *pargv[ 0 ];
65 0 : char * kind = strchr( name, ',' );
66 0 : char * kind_id = NULL;
67 0 : if( kind!=NULL ) {
68 0 : fd_cstr_fini( kind );
69 0 : kind += 1;
70 0 : kind_id = strchr( kind, ',' );
71 0 : if( kind_id!=NULL ) {
72 0 : fd_cstr_fini( kind_id );
73 0 : kind_id += 1;
74 0 : if( FD_UNLIKELY( NULL!=strchr( kind_id, ',' ) ) ) FD_LOG_ERR(( "invalid metric selector %s %s %s", name, kind, kind_id ));
75 0 : }
76 0 : }
77 0 : *pargc -= 1;
78 0 : *pargv += 1;
79 :
80 0 : if( FD_UNLIKELY( NULL==name || strlen( name )>=sizeof(selector->name)) ) FD_LOG_ERR(( "invalid metric selector name %s", name ));
81 0 : fd_cstr_ncpy( selector->name, name, sizeof(selector->name) );
82 0 : if( FD_UNLIKELY( NULL!=kind && strlen( kind )>=sizeof(selector->kind)) ) FD_LOG_ERR(( "invalid metric selector kind %s", kind ));
83 0 : fd_cstr_ncpy( selector->kind, kind, sizeof(selector->kind) );
84 0 : selector->kind_id = NULL==kind_id ? ULONG_MAX : fd_cstr_to_ulong( kind_id );
85 0 : }
86 0 : }
87 :
88 : static int
89 : selector_matches( struct fd_action_metrics_record_selector const * selector,
90 : char const * metric_name,
91 : char const * tile_name,
92 0 : ulong tile_id ) {
93 0 : if( 0!=strcmp( metric_name, selector->name ) ) return 0;
94 0 : if( selector->kind[ 0 ] && 0!=strcmp( tile_name, selector->kind ) ) return 0;
95 0 : if( ULONG_MAX!=selector->kind_id && tile_id!=selector->kind_id ) return 0;
96 0 : return 1;
97 0 : }
98 :
99 : static void
100 : reconstruct_topo( fd_config_t * config,
101 0 : char const * topo_name ) {
102 0 : if( !topo_name[0] ) return; /* keep default action topo */
103 :
104 0 : action_t const * selected = NULL;
105 0 : for( action_t ** a=ACTIONS; a!=NULL; a++ ) {
106 0 : action_t const * action = *a;
107 0 : if( 0==strcmp( action->name, topo_name ) ) {
108 0 : selected = action;
109 0 : break;
110 0 : }
111 0 : }
112 :
113 0 : if( !selected ) FD_LOG_ERR(( "Unknown --topo %s", topo_name ));
114 0 : if( !selected->topo ) FD_LOG_ERR(( "Cannot recover topology for --topo %s", topo_name ));
115 :
116 0 : selected->topo( config );
117 0 : }
118 :
119 : static void
120 : metrics_record_cmd_fn( args_t * args,
121 0 : fd_config_t * config ) {
122 :
123 0 : struct sigaction sa = { .sa_handler = exit_signal };
124 0 : if( FD_UNLIKELY( sigaction( SIGTERM, &sa, NULL ) ) ) FD_LOG_ERR(( "sigaction(SIGTERM) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
125 0 : if( FD_UNLIKELY( sigaction( SIGINT, &sa, NULL ) ) ) FD_LOG_ERR(( "sigaction(SIGINT) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
126 :
127 0 : reconstruct_topo( config, args->metrics_record.topo );
128 :
129 0 : fd_topo_join_workspaces( &config->topo, FD_SHMEM_JOIN_MODE_READ_ONLY );
130 0 : fd_topo_fill( &config->topo );
131 :
132 0 : uchar write_buf[ 4096 ];
133 0 : fd_io_buffered_ostream_t out[1];
134 0 : FD_TEST( out==fd_io_buffered_ostream_init( out, STDOUT_FILENO, write_buf, sizeof(write_buf) ) );
135 :
136 0 : fd_io_buffered_ostream_write( out, "timestamp", 9 );
137 :
138 0 : ulong metrics_cnt = 0UL;
139 0 : struct {
140 0 : fd_metrics_meta_t const * meta;
141 0 : volatile ulong const * value;
142 0 : } metrics[ 4096 ];
143 :
144 0 : for( ulong i=0UL; i<FD_METRICS_ALL_TOTAL; i++ ) {
145 0 : fd_metrics_meta_t const * metric = &FD_METRICS_ALL[ i ];
146 0 : if( metric->type!=FD_METRICS_TYPE_GAUGE && metric->type!=FD_METRICS_TYPE_COUNTER ) continue;
147 0 : for( ulong j=0UL; j<config->topo.tile_cnt; j++ ) {
148 0 : fd_topo_tile_t const * tile = &config->topo.tiles[ j ];
149 0 : char const * tile_name = tile->metrics_name[ 0 ] ? tile->metrics_name : tile->name;
150 0 : for( ulong s=0UL; s<args->metrics_record.selectors_cnt; s++ ) {
151 0 : if( FD_LIKELY( !selector_matches( &args->metrics_record.selectors[ s ], metric->name, tile_name, tile->kind_id ) ) ) continue;
152 0 : if( FD_UNLIKELY( metrics_cnt>=(sizeof(metrics)/sizeof(metrics[0])) ) ) FD_LOG_ERR(( "too many metrics %lu", metrics_cnt ));
153 0 : metrics[ metrics_cnt ].meta = metric;
154 0 : metrics[ metrics_cnt ].value = fd_metrics_tile( tile->metrics ) + metric->offset;
155 0 : ++metrics_cnt;
156 :
157 0 : char buf[ 1024 ];
158 0 : char * p = fd_cstr_append_printf( fd_cstr_init( buf ), ",%s{kind=%s kind_id=%lu", metric->name, tile->name, tile->kind_id );
159 0 : if( metric->enum_name ) p = fd_cstr_append_printf( p, " %s=%s", metric->enum_name, metric->enum_variant );
160 0 : p = fd_cstr_append_char( p, '}' );
161 0 : fd_io_buffered_ostream_write( out, buf, (ulong)(p-buf) );
162 0 : break;
163 0 : }
164 0 : }
165 0 : }
166 :
167 : /* TODO: Add support for in/out link metrics */
168 :
169 0 : for( ulong i=0UL; i<FD_METRICS_TILE_KIND_CNT; i++ ) {
170 0 : for( ulong j=0UL; j<FD_METRICS_TILE_KIND_SIZES[ i ]; j++ ) {
171 0 : fd_metrics_meta_t const * metric = &FD_METRICS_TILE_KIND_METRICS[ i ][ j ];
172 0 : if( metric->type!=FD_METRICS_TYPE_GAUGE && metric->type!=FD_METRICS_TYPE_COUNTER ) continue;
173 0 : for( ulong k=0UL; k<config->topo.tile_cnt; k++ ) {
174 0 : fd_topo_tile_t const * tile = &config->topo.tiles[ k ];
175 0 : char const * tile_name = tile->metrics_name[ 0 ] ? tile->metrics_name : tile->name;
176 0 : if( 0!=strcmp( tile_name, FD_METRICS_TILE_KIND_NAMES[ i ] ) ) continue;
177 0 : for( ulong s=0UL; s<args->metrics_record.selectors_cnt; s++ ) {
178 0 : if( FD_LIKELY( !selector_matches( &args->metrics_record.selectors[ s ], metric->name, tile_name, tile->kind_id ) ) ) continue;
179 0 : if( FD_UNLIKELY( metrics_cnt>=(sizeof(metrics)/sizeof(metrics[0])) ) ) FD_LOG_ERR(( "too many metrics %lu", metrics_cnt ));
180 0 : metrics[ metrics_cnt ].meta = metric;
181 0 : metrics[ metrics_cnt ].value = fd_metrics_tile( tile->metrics ) + metric->offset;
182 0 : ++metrics_cnt;
183 :
184 0 : char buf[ 1024 ];
185 0 : char * p = fd_cstr_append_printf( fd_cstr_init( buf ), ",%s{kind=%s kind_id=%lu", metric->name, tile->name, tile->kind_id );
186 0 : if( metric->enum_name ) p = fd_cstr_append_printf( p, " %s=%s", metric->enum_name, metric->enum_variant );
187 0 : p = fd_cstr_append_char( p, '}' );
188 0 : fd_io_buffered_ostream_write( out, buf, (ulong)(p-buf) );
189 0 : break;
190 0 : }
191 0 : }
192 0 : }
193 0 : }
194 :
195 0 : if( FD_UNLIKELY( metrics_cnt==0UL ) ) FD_LOG_ERR(( "no matching metrics found" ));
196 0 : fd_io_buffered_ostream_write( out, "\n", 1 );
197 0 : fd_io_buffered_ostream_flush( out );
198 :
199 0 : ulong count = 0UL, skip = 0UL;
200 0 : long const start = fd_log_wallclock();
201 0 : long const interval = (long)args->metrics_record.interval_ns;
202 0 : long next = ((start/interval)*interval)+interval;
203 0 : while( running ) {
204 0 : long now = fd_log_wait_until( next );
205 0 : for( next+=interval; next<=now; next+=interval ) skip++;
206 :
207 0 : char * const b = fd_io_buffered_ostream_peek( out );
208 0 : char * const e = b + fd_io_buffered_ostream_peek_sz( out );
209 0 : char * p = b;
210 0 : if( FD_UNLIKELY( e-p<=20L ) ) FD_LOG_ERR(( "increase write buffer size" ));
211 0 : p = fd_cstr_append_ulong_as_text( p, ' ', '\0', (ulong)now, fd_ulong_base10_dig_cnt( (ulong)now ) );
212 :
213 0 : for( ulong i=0UL; i<metrics_cnt; i++ ) {
214 0 : ulong value = *metrics[ i ].value;
215 0 : switch( metrics[ i ].meta->converter ) {
216 0 : case FD_METRICS_CONVERTER_NANOSECONDS: value = fd_metrics_convert_ticks_to_nanoseconds( value ); break;
217 0 : case FD_METRICS_CONVERTER_SECONDS: value = (ulong)(fd_metrics_convert_ticks_to_seconds( value ) + 0.5); /* round, not truncate */ break;
218 0 : case FD_METRICS_CONVERTER_NONE: break;
219 0 : default: FD_LOG_ERR(( "unknown converter %i", metrics[ i ].meta->converter ));
220 0 : }
221 0 : if( FD_UNLIKELY( e-p<=22L ) ) FD_LOG_ERR(( "increase write buffer size" ));
222 0 : p = fd_cstr_append_char( p, ',' );
223 0 : p = fd_cstr_append_ulong_as_text( p, ' ', '\0', value, fd_ulong_base10_dig_cnt( value ) );
224 0 : }
225 0 : p = fd_cstr_append_char( p, '\n' );
226 0 : fd_io_buffered_ostream_seek( out, (ulong)(p-b) );
227 0 : fd_io_buffered_ostream_flush( out );
228 0 : count++;
229 0 : }
230 :
231 0 : FD_LOG_NOTICE(( "recorded %lu samples in %f seconds", count, (double)(fd_log_wallclock()-start)/1.0e9 ));
232 0 : if( skip ) FD_LOG_WARNING(( "skipped %lu samples, try reducing metric count or increasing interval", skip ));
233 :
234 0 : fd_io_buffered_ostream_flush( out );
235 0 : fd_io_buffered_ostream_fini( out );
236 :
237 0 : fd_topo_leave_workspaces( &config->topo );
238 0 : }
239 :
240 : action_t fd_action_metrics_record = {
241 : .name = "metrics-record",
242 : .description = "Continuously print a select subset of metrics to STDOUT in CSV format",
243 : .is_diagnostic = 1,
244 : .args = metrics_record_cmd_args,
245 : .fn = metrics_record_cmd_fn,
246 : };
|