Line data Source code
1 : #define _GNU_SOURCE
2 : #include "../../shared/commands/run/run.h"
3 :
4 : #include "../../../util/tile/fd_tile_private.h"
5 :
6 : #include <unistd.h>
7 : #include <stdlib.h>
8 : #include <errno.h>
9 : #include <pthread.h>
10 : #include <sys/wait.h>
11 :
12 : #define NAME "run-agave"
13 :
14 : fd_topo_run_tile_t
15 : fdctl_tile_run( fd_topo_tile_t const * tile );
16 :
17 : extern void fd_ext_validator_main( const char ** args );
18 :
19 : extern int * fd_log_private_shared_lock;
20 :
21 : static void
22 0 : clone_labs_memory_space_tiles( config_t * config ) {
23 : /* preload shared memory for all the agave tiles at once */
24 0 : for( ulong i=0; i<config->topo.wksp_cnt; i++ ) {
25 0 : fd_topo_wksp_t * wksp = &config->topo.workspaces[ i ];
26 0 : if( FD_LIKELY( !strcmp( wksp->name, "pack_bank" ) ||
27 0 : !strcmp( wksp->name, "shred_store" ) ) ) {
28 0 : fd_topo_join_workspace( &config->topo, wksp, FD_SHMEM_JOIN_MODE_READ_ONLY );
29 0 : } else if( FD_LIKELY( !strcmp( wksp->name, "bank_poh" ) ||
30 0 : !strcmp( wksp->name, "bank_pack" ) ||
31 0 : !strcmp( wksp->name, "bank_busy" ) ||
32 0 : !strcmp( wksp->name, "poh_shred" ) ||
33 0 : !strcmp( wksp->name, "gossip_dedup" ) ||
34 0 : !strcmp( wksp->name, "stake_out" ) ||
35 0 : !strcmp( wksp->name, "metric_in" ) ||
36 0 : !strcmp( wksp->name, "bank" ) ||
37 0 : !strcmp( wksp->name, "poh" ) ||
38 0 : !strcmp( wksp->name, "store" ) ) ) {
39 0 : fd_topo_join_workspace( &config->topo, wksp, FD_SHMEM_JOIN_MODE_READ_WRITE );
40 0 : }
41 0 : }
42 :
43 0 : fd_topo_run_single_process( &config->topo, 1, config->uid, config->gid, fdctl_tile_run );
44 0 : }
45 :
46 : static int _fd_ext_larger_max_cost_per_block, _fd_ext_larger_shred_limits_per_block, _fd_ext_disable_status_cache;
47 :
48 0 : int fd_ext_larger_max_cost_per_block ( void ) { return _fd_ext_larger_max_cost_per_block; }
49 0 : int fd_ext_larger_shred_limits_per_block( void ) { return _fd_ext_larger_shred_limits_per_block; }
50 0 : int fd_ext_disable_status_cache ( void ) { return _fd_ext_disable_status_cache; }
51 :
52 : void
53 0 : agave_boot( config_t const * config ) {
54 0 : uint idx = 0;
55 0 : char const * argv[ 128 ];
56 0 : uint bufidx = 0;
57 0 : char buffer[ 32 ][ 16 ];
58 0 : #define ADD1( arg ) do { argv[ idx++ ] = arg; } while( 0 )
59 0 : #define ADD( arg, val ) do { argv[ idx++ ] = arg; argv[ idx++ ] = val; } while( 0 )
60 0 : #define ADDU( arg, val ) do { argv[ idx++ ] = arg; FD_TEST( fd_cstr_printf_check( buffer[ bufidx ], 16, NULL, "%u", val ) ); argv[ idx++ ] = buffer[ bufidx++ ]; } while( 0 )
61 0 : #define ADDH( arg, val ) do { argv[ idx++ ] = arg; FD_TEST( fd_cstr_printf_check( buffer[ bufidx ], 16, NULL, "%hu", val ) ); argv[ idx++ ] = buffer[ bufidx++ ]; } while( 0 )
62 :
63 0 : ADD1( "fdctl" );
64 0 : ADD( "--log", "-" );
65 :
66 : /* net */
67 0 : if( FD_UNLIKELY( strcmp( config->frankendancer.dynamic_port_range, "" ) ) )
68 0 : ADD( "--dynamic-port-range", config->frankendancer.dynamic_port_range );
69 :
70 0 : if( strcmp( config->net.bind_address, "" ) )
71 0 : ADD( "--bind-address", config->net.bind_address );
72 0 : ADDU( "--firedancer-tpu-port", config->tiles.quic.regular_transaction_listen_port );
73 0 : ADDU( "--firedancer-tvu-port", config->tiles.shred.shred_listen_port );
74 :
75 : /* consensus */
76 0 : ADD( "--identity", config->paths.identity_key );
77 0 : if( strcmp( config->paths.vote_account, "" ) )
78 0 : ADD( "--vote-account", config->paths.vote_account );
79 0 : for( ulong i=0UL; i<config->frankendancer.paths.authorized_voter_paths_cnt; i++ )
80 0 : ADD( "--authorized-voter", config->frankendancer.paths.authorized_voter_paths[ i ] );
81 0 : if( !config->frankendancer.consensus.snapshot_fetch ) ADD1( "--no-snapshot-fetch" );
82 0 : if( !config->frankendancer.consensus.genesis_fetch ) ADD1( "--no-genesis-fetch" );
83 0 : if( !config->frankendancer.consensus.poh_speed_test ) ADD1( "--no-poh-speed-test" );
84 0 : if( strcmp( config->frankendancer.consensus.expected_genesis_hash, "" ) )
85 0 : ADD( "--expected-genesis-hash", config->frankendancer.consensus.expected_genesis_hash );
86 0 : if( config->frankendancer.consensus.wait_for_supermajority_at_slot ) {
87 0 : ADDU( "--wait-for-supermajority", config->frankendancer.consensus.wait_for_supermajority_at_slot );
88 0 : if( strcmp( config->frankendancer.consensus.expected_bank_hash, "" ) )
89 0 : ADD( "--expected-bank-hash", config->frankendancer.consensus.expected_bank_hash );
90 0 : }
91 :
92 0 : if( config->consensus.expected_shred_version )
93 0 : ADDH( "--expected-shred-version", config->consensus.expected_shred_version );
94 0 : if( !config->frankendancer.consensus.wait_for_vote_to_start_leader )
95 0 : ADD1( "--no-wait-for-vote-to-start-leader");
96 0 : for( ulong i=0; i<config->frankendancer.consensus.hard_fork_at_slots_cnt; i++ )
97 0 : ADDU( "--hard-fork", config->frankendancer.consensus.hard_fork_at_slots[ i ] );
98 0 : for( ulong i=0; i<config->frankendancer.consensus.known_validators_cnt; i++ )
99 0 : ADD( "--known-validator", config->frankendancer.consensus.known_validators[ i ] );
100 :
101 0 : ADD( "--snapshot-archive-format", config->frankendancer.ledger.snapshot_archive_format );
102 0 : if( FD_UNLIKELY( config->frankendancer.ledger.require_tower ) ) ADD1( "--require-tower" );
103 :
104 0 : if( FD_UNLIKELY( !config->frankendancer.consensus.os_network_limits_test ) )
105 0 : ADD1( "--no-os-network-limits-test" );
106 :
107 : /* ledger */
108 0 : ADD( "--ledger", config->frankendancer.paths.ledger );
109 0 : ADDU( "--limit-ledger-size", config->frankendancer.ledger.limit_size );
110 0 : if( strcmp( "", config->frankendancer.paths.accounts_path ) )
111 0 : ADD( "--accounts", config->frankendancer.paths.accounts_path );
112 0 : if( strcmp( "", config->frankendancer.ledger.accounts_index_path ) )
113 0 : ADD( "--accounts-index-path", config->frankendancer.ledger.accounts_index_path );
114 0 : if( strcmp( "", config->frankendancer.ledger.accounts_hash_cache_path ) )
115 0 : ADD( "--accounts-hash-cache-path", config->frankendancer.ledger.accounts_hash_cache_path );
116 0 : for( ulong i=0UL; i<config->frankendancer.ledger.account_indexes_cnt; i++ )
117 0 : ADD( "--account-index", config->frankendancer.ledger.account_indexes[ i ] );
118 0 : if( FD_LIKELY( !config->frankendancer.ledger.account_index_include_keys_cnt ) ) {
119 0 : for( ulong i=0UL; i<config->frankendancer.ledger.account_index_exclude_keys_cnt; i++ )
120 0 : ADD( "--account-index-exclude-key", config->frankendancer.ledger.account_index_exclude_keys[ i ] );
121 0 : } else {
122 0 : for( ulong i=0UL; i<config->frankendancer.ledger.account_index_include_keys_cnt; i++ )
123 0 : ADD( "--account-index-include-key", config->frankendancer.ledger.account_index_include_keys[ i ] );
124 0 : }
125 :
126 : /* gossip */
127 0 : for( ulong i=0UL; i<config->gossip.entrypoints_cnt; i++ ) ADD( "--entrypoint", config->gossip.entrypoints[ i ] );
128 0 : if( !config->frankendancer.gossip.port_check ) ADD1( "--no-port-check" );
129 0 : ADDH( "--gossip-port", config->gossip.port );
130 0 : if( config->development.gossip.allow_private_address ) {
131 0 : ADD1( "--allow-private-addr" );
132 0 : }
133 :
134 : /* rpc */
135 0 : if( config->frankendancer.rpc.port ) ADDH( "--rpc-port", config->frankendancer.rpc.port );
136 0 : if( config->frankendancer.rpc.full_api ) ADD1( "--full-rpc-api" );
137 0 : if( config->frankendancer.rpc.private ) ADD1( "--private-rpc" );
138 0 : if( strcmp( config->frankendancer.rpc.public_address, "" ) ) ADD( "--public-rpc-address", config->frankendancer.rpc.public_address );
139 0 : if( strcmp( config->frankendancer.rpc.bind_address, "" ) ) ADD( "--rpc-bind-address", config->frankendancer.rpc.bind_address );
140 0 : if( config->frankendancer.rpc.transaction_history ) ADD1( "--enable-rpc-transaction-history" );
141 0 : if( config->frankendancer.rpc.extended_tx_metadata_storage ) ADD1( "--enable-extended-tx-metadata-storage" );
142 0 : if( config->frankendancer.rpc.only_known ) ADD1( "--only-known-rpc" );
143 0 : if( config->frankendancer.rpc.pubsub_enable_block_subscription ) ADD1( "--rpc-pubsub-enable-block-subscription" );
144 0 : if( config->frankendancer.rpc.pubsub_enable_vote_subscription ) ADD1( "--rpc-pubsub-enable-vote-subscription" );
145 0 : if( config->frankendancer.rpc.bigtable_ledger_storage ) ADD1( "--enable-rpc-bigtable-ledger-storage" );
146 :
147 : /* snapshots */
148 0 : if( !config->frankendancer.snapshots.enabled ) {
149 0 : ADD1( "--no-snapshots" );
150 0 : } else {
151 0 : if( !config->frankendancer.snapshots.incremental_snapshots ) {
152 0 : ADD1( "--no-incremental-snapshots" );
153 0 : ADDU( "--snapshot-interval-slots", config->frankendancer.snapshots.full_snapshot_interval_slots );
154 0 : } else {
155 0 : ADDU( "--full-snapshot-interval-slots", config->frankendancer.snapshots.full_snapshot_interval_slots );
156 0 : ADDU( "--snapshot-interval-slots", config->frankendancer.snapshots.incremental_snapshot_interval_slots );
157 0 : }
158 0 : }
159 0 : ADD( "--snapshots", config->frankendancer.snapshots.path );
160 0 : if( strcmp( "", config->frankendancer.snapshots.incremental_path ) ) ADD( "--incremental-snapshot-archive-path", config->frankendancer.snapshots.incremental_path );
161 0 : ADDU( "--maximum-snapshots-to-retain", config->frankendancer.snapshots.maximum_full_snapshots_to_retain );
162 0 : ADDU( "--maximum-incremental-snapshots-to-retain", config->frankendancer.snapshots.maximum_incremental_snapshots_to_retain );
163 0 : ADDU( "--maximum-snapshot-download-abort", config->frankendancer.snapshots.maximum_snapshot_download_abort );
164 0 : ADDU( "--minimal-snapshot-download-speed", config->frankendancer.snapshots.minimum_snapshot_download_speed );
165 :
166 0 : if( config->frankendancer.layout.agave_unified_scheduler_handler_threads ) {
167 0 : if( FD_UNLIKELY( config->frankendancer.layout.agave_unified_scheduler_handler_threads>config->topo.agave_affinity_cnt ) ) {
168 0 : FD_LOG_ERR(( "Trying to spawn %u handler threads but the agave subprocess has %lu cores. "
169 0 : "Either increase the number of cores in [layout.agave_affinity] or reduce "
170 0 : "the number of threads in [layout.agave_unified_scheduler_handler_threads].",
171 0 : config->frankendancer.layout.agave_unified_scheduler_handler_threads, config->topo.agave_affinity_cnt ));
172 0 : }
173 0 : ADDU( "--unified-scheduler-handler-threads", config->frankendancer.layout.agave_unified_scheduler_handler_threads );
174 0 : } else {
175 : // agave_affinity_cnt >= 8 => agave_affinity_cnt - 4
176 : // 4 <= agave_affinity_cnt < 8 => 4
177 : // agave_affinity_cnt < 4 => agave_affinity_cnt
178 0 : ulong num_threads = fd_ulong_if( config->topo.agave_affinity_cnt>=4UL,
179 0 : fd_ulong_if( config->topo.agave_affinity_cnt>=8, config->topo.agave_affinity_cnt-4UL, 4UL ),
180 0 : config->topo.agave_affinity_cnt );
181 0 : ADDU( "--unified-scheduler-handler-threads", (uint)num_threads );
182 0 : }
183 :
184 0 : argv[ idx ] = NULL;
185 :
186 0 : if( FD_LIKELY( strcmp( config->frankendancer.reporting.solana_metrics_config, "" ) ) ) {
187 0 : if( FD_UNLIKELY( setenv( "SOLANA_METRICS_CONFIG", config->frankendancer.reporting.solana_metrics_config, 1 ) ) )
188 0 : FD_LOG_ERR(( "setenv() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
189 0 : }
190 :
191 0 : FD_LOG_INFO(( "Running Agave validator with the following arguments:" ));
192 0 : for( ulong j=0UL; j<idx; j++ ) FD_LOG_INFO(( "%s", argv[j] ));
193 :
194 0 : FD_CPUSET_DECL( floating_cpu_set );
195 0 : if( FD_UNLIKELY( fd_cpuset_getaffinity( 0, floating_cpu_set ) ) )
196 0 : FD_LOG_ERR(( "sched_getaffinity failed (%i-%s)", errno, fd_io_strerror( errno ) ));
197 :
198 0 : FD_CPUSET_DECL( cpu_set );
199 0 : for( ulong i=0UL; i<config->topo.agave_affinity_cnt; i++ ) {
200 0 : fd_cpuset_insert( cpu_set, config->topo.agave_affinity_cpu_idx[ i ] );
201 0 : }
202 :
203 0 : if( FD_UNLIKELY( fd_cpuset_setaffinity( 0, cpu_set ) ) ) {
204 0 : if( FD_LIKELY( errno==EINVAL ) ) {
205 0 : FD_LOG_ERR(( "Unable to set the affinity for threads created by Agave. It is likely "
206 0 : "that the affinity you have specified for Agave under [layout.agave_affinity] "
207 0 : "in the configuration file contains CPUs which do not exist on this machine." ));
208 0 : } else {
209 0 : FD_LOG_ERR(( "sched_setaffinity failed (%i-%s)", errno, fd_io_strerror( errno ) ));
210 0 : }
211 0 : }
212 :
213 : /* Consensus-breaking development-only CU and/or shred limit increase. */
214 0 : _fd_ext_larger_max_cost_per_block = config->development.bench.larger_max_cost_per_block;
215 0 : _fd_ext_larger_shred_limits_per_block = config->development.bench.larger_shred_limits_per_block;
216 : /* Consensus-breaking bench-only option to disable status cache */
217 0 : _fd_ext_disable_status_cache = config->development.bench.disable_status_cache;
218 0 : FD_COMPILER_MFENCE();
219 :
220 : /* agave_main will exit(1) if it fails, so no return code */
221 0 : fd_ext_validator_main( (const char **)argv );
222 0 : }
223 :
224 : int
225 0 : agave_main( void * args ) {
226 0 : config_t * config = args;
227 :
228 0 : if( FD_UNLIKELY( config->development.debug_tile ) ) {
229 0 : if( FD_UNLIKELY( config->development.debug_tile==UINT_MAX ) ) {
230 0 : FD_LOG_WARNING(( "waiting for debugger to attach to tile agave pid:%lu", fd_sandbox_getpid() ));
231 0 : if( FD_UNLIKELY( -1==kill( getpid(), SIGSTOP ) ) )
232 0 : FD_LOG_ERR(( "kill(SIGSTOP) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
233 0 : fd_log_private_shared_lock[1] = 0;
234 0 : } else {
235 0 : while( FD_LIKELY( fd_log_private_shared_lock[1] ) ) FD_SPIN_PAUSE();
236 0 : }
237 0 : }
238 :
239 0 : clone_labs_memory_space_tiles( config );
240 :
241 0 : ulong pid = fd_sandbox_getpid(); /* Need to read /proc again.. we got a new PID from clone */
242 0 : fd_log_private_tid_set( pid );
243 0 : fd_log_private_stack_discover( FD_TILE_PRIVATE_STACK_SZ,
244 0 : &fd_tile_private_stack0, &fd_tile_private_stack1 );
245 0 : FD_LOG_NOTICE(( "booting agave pid:%lu", fd_log_group_id() ));
246 :
247 0 : fd_sandbox_switch_uid_gid( config->uid, config->gid );
248 :
249 0 : agave_boot( config );
250 0 : return 0;
251 0 : }
252 :
253 : void
254 : run_agave_cmd_fn( args_t * args FD_PARAM_UNUSED,
255 0 : config_t * config ) {
256 0 : fd_log_thread_set( "agave" );
257 :
258 0 : void * stack = create_clone_stack();
259 :
260 : /* Also clone Agave into PID namespaces so it cannot signal
261 : other tile or the parent. */
262 0 : int flags = config->development.sandbox ? CLONE_NEWPID : 0;
263 0 : pid_t clone_pid = clone( agave_main, (uchar *)stack + FD_TILE_PRIVATE_STACK_SZ, flags, config );
264 0 : if( FD_UNLIKELY( clone_pid<0 ) ) FD_LOG_ERR(( "clone() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
265 0 : }
266 :
267 : action_t fd_action_run_agave = {
268 : .name = "run-agave",
269 : .args = NULL,
270 : .fn = run_agave_cmd_fn,
271 : .perm = NULL,
272 : .description = "Start up the Agave side of a Firedancer validator",
273 : };
|