Line data Source code
1 : #define _GNU_SOURCE
2 : #include "run/run.h"
3 :
4 : #include "../../platform/fd_cap_chk.h"
5 : #include "../../../disco/keyguard/fd_keyswitch.h"
6 : #include "../../../disco/keyguard/fd_keyload.h"
7 : #include "../../../tango/fd_tango.h"
8 : #include "../../../util/fd_util.h"
9 :
10 : #include <strings.h>
11 : #include <unistd.h>
12 : #include <sys/resource.h>
13 :
14 : /* The process of switching identity of the validator is somewhat
15 : involved, to prevent it from producing torn data (for example,
16 : a block where half the shreds are signed by one private key, and half
17 : are signed by another).
18 :
19 : The process of switching is a state machine that progresses linearly
20 : through each of the states. Generally, no transitions are allowed
21 : except direct forward steps, except in emergency recovery cases an
22 : operator can force the state back to unlocked.
23 :
24 : The states follow, in order. */
25 :
26 : /* State 0: UNLOCKED.
27 : The validator is not currently in the process of switching keys. */
28 0 : #define FD_SET_IDENTITY_STATE_UNLOCKED (0UL)
29 :
30 : /* State 1: LOCKED
31 : Some client to the validator has requested a key switch. To do so,
32 : it acquired an exclusive lock on the validator to prevent the
33 : switch potentially being interleaved with another client. */
34 0 : #define FD_SET_IDENTITY_STATE_LOCKED (1UL)
35 :
36 : /* State 2: POH_HALT_REQUESTED
37 : The first step in the key switch process is to pause the leader
38 : pipeline of the validator, preventing us from becoming leader, but
39 : finishing any currently in progress leader slot if there is one.
40 : While in this state, the validator is waiting for the leader
41 : pipeline to confirm that it has paused production, and is no longer
42 : leader.
43 :
44 : This halt request also causes the PoH tile to switch both:
45 :
46 : (a) The identity key used by the PoH tile itself, used to
47 : determine when this validator is leader in the schedule.
48 :
49 : (b) The key used by the Agave sub-process, if running
50 : Frankendancer. The Agave key is inside a Mutex<> so it is
51 : swapped atomically across all consumers. */
52 0 : #define FD_SET_IDENTITY_STATE_POH_HALT_REQUESTED (2UL)
53 :
54 : /* State 3: POH_HALTED
55 : The PoH tile has confirmed that it has halted the leader pipeline,
56 : and the validator is no longer leader. No more blocks will be
57 : produced until it is unhalted. In addition, the PoH tile has
58 : switched both its own identity key and the Agave key. */
59 0 : #define FD_SET_IDENTITY_STATE_POH_HALTED (3UL)
60 :
61 : /* State 4: SHRED_FLUSH_REQUESTED
62 : Once the leader pipeline is halted, it must be flushed, meaning any
63 : in-flight shreds that could potentially need to be signed with the
64 : old key are signed and sent to the network. This doesn't strictly
65 : need to happen before other tiles have their key flushed, but it
66 : makes the control flow easier to understand if we do this as an
67 : explicit step.
68 :
69 : The shred tile is flushed by telling it the last sequence number
70 : the PoH tile has produced for an outgoing shred, at the time it was
71 : halted, and then waiting for the shred tile to confirm that it has
72 : seen and processed all shreds up to and including that sequence
73 : number.
74 :
75 : In addition to flushing out any in-flight shreds, this also causes
76 : the shred tile to switch the identity key it uses internally, for
77 : determining where this validator is positioned in the Turbine tree. */
78 0 : #define FD_SET_IDENTITY_STATE_SHRED_FLUSH_REQUESTED (4UL)
79 :
80 : /* State 5: SHRED_FLUSHED
81 : The shred tile confirms that it has seen and processed all shreds
82 : up to and including the last sequence number produced by the PoH
83 : tile at the time it was halted. The shred tile has also switched
84 : its own identity key when it indicates the flush is complete. */
85 0 : #define FD_SET_IDENTITY_STATE_SHRED_FLUSHED (5UL)
86 :
87 : /* State 6: ALL_SWITCH_REQUESTED
88 : The client now requests that all other tiles which consume the
89 : identity key in some way switch to the new key. The leader
90 : pipeline is still halted, although it doesn't strictly need to be,
91 : since outgoing shreds have been flushed. This is done to keep the
92 : control flow simpler.
93 :
94 : The other tiles using the identity key are:
95 :
96 : (a) Sign. The sign tile is responsible for holding the private
97 : key.
98 : (b) GUI. The GUI shows the validator identity key to the user,
99 : and uses the key to determine which blocks are ours for
100 : highlighting on the frontend.
101 : (c) Event. Outgoing events to the event server are signed with
102 : the identity key to authenticate the sender.
103 : (d) Bundle. The validator must authenticate to any connected
104 : bundle server with the identity key to prove it is on the
105 : leader schedule.,
106 : (e) Gossip. The gossip tile sends out ContactInfo messages with
107 : our identity key, and also uses the identity key to sign
108 : outgoing gossip messages. */
109 0 : #define FD_SET_IDENTITY_STATE_ALL_SWITCH_REQUESTED (6UL)
110 :
111 : /* State 7: ALL_SWITCHED
112 : All remaining tiles that use the identity key have confirmed that
113 : they have switched to the new key. The validator is now fully
114 : switched over. */
115 0 : #define FD_SET_IDENTITY_STATE_ALL_SWITCHED (7UL)
116 :
117 : /* State 8: POH_UNHALT_REQUESTED
118 : The final state, now that all tiles have switched, the leader
119 : pipeline can be unblocked and the validator can resume producing
120 : blocks. The next state once the PoH tile confirms the leader
121 : pipeline is unlocked, is UNLOCKED. */
122 0 : #define FD_SET_IDENTITY_STATE_POH_UNHALT_REQUESTED (8UL)
123 :
124 : void
125 : set_identity_cmd_perm( args_t * args FD_PARAM_UNUSED,
126 : fd_cap_chk_t * chk,
127 0 : config_t const * config FD_PARAM_UNUSED ) {
128 : /* 5 huge pages for the key storage area */
129 0 : ulong mlock_limit = 5UL * FD_SHMEM_NORMAL_PAGE_SZ;
130 0 : fd_cap_chk_raise_rlimit( chk, "set-identity", RLIMIT_MEMLOCK, mlock_limit, "call `rlimit(2)` to increase `RLIMIT_MEMLOCK` so all memory can be locked with `mlock(2)`" );
131 0 : }
132 :
133 : static fd_keyswitch_t *
134 : find_keyswitch( fd_topo_t const * topo,
135 0 : char const * tile_name ) {
136 0 : ulong tile_idx = fd_topo_find_tile( topo, tile_name, 0UL );
137 0 : FD_TEST( tile_idx!=ULONG_MAX );
138 0 : FD_TEST( topo->tiles[ tile_idx ].keyswitch_obj_id!=ULONG_MAX );
139 :
140 0 : fd_keyswitch_t * keyswitch = fd_topo_obj_laddr( topo, topo->tiles[ tile_idx ].keyswitch_obj_id );
141 0 : FD_TEST( keyswitch );
142 0 : return keyswitch;
143 0 : }
144 :
145 : static void FD_FN_SENSITIVE
146 : poll_keyswitch( fd_topo_t * topo,
147 : ulong * state,
148 : ulong * halted_seq,
149 : uchar * keypair,
150 : int * has_error,
151 : int require_tower,
152 0 : int force_lock ) {
153 0 : switch( *state ) {
154 0 : case FD_SET_IDENTITY_STATE_UNLOCKED: {
155 0 : fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
156 0 : if( FD_LIKELY( FD_KEYSWITCH_STATE_UNLOCKED==FD_ATOMIC_CAS( &poh->state, FD_KEYSWITCH_STATE_UNLOCKED, FD_KEYSWITCH_STATE_LOCKED ) ) ) {
157 0 : *state = FD_SET_IDENTITY_STATE_LOCKED;
158 0 : FD_LOG_INFO(( "Locking validator identity for key switch..." ));
159 0 : } else {
160 0 : if( FD_UNLIKELY( force_lock ) ) {
161 0 : *state = FD_SET_IDENTITY_STATE_LOCKED;
162 0 : FD_LOG_WARNING(( "Another process was changing keys, but `--force` supplied. Forcing lock on validator identity for key switch..." ));
163 0 : } else {
164 0 : FD_LOG_ERR(( "Cannot set-identity because Firedancer is already in the process of switching keys. If you are not currently "
165 0 : "changing the identity, it might be because an identity change was abandoned. To recover, run the `set-identity` "
166 0 : "command again with the `--force` argument." ));
167 0 : }
168 0 : }
169 0 : break;
170 0 : }
171 0 : case FD_SET_IDENTITY_STATE_LOCKED: {
172 0 : fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
173 0 : memcpy( poh->bytes, keypair, 64UL );
174 0 : poh->param = !!require_tower;
175 0 : FD_COMPILER_MFENCE();
176 0 : poh->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
177 0 : FD_COMPILER_MFENCE();
178 0 : *state = FD_SET_IDENTITY_STATE_POH_HALT_REQUESTED;
179 0 : FD_LOG_INFO(( "Pausing leader pipeline for key switch..." ));
180 0 : break;
181 0 : }
182 0 : case FD_SET_IDENTITY_STATE_POH_HALT_REQUESTED: {
183 0 : fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
184 0 : if( FD_LIKELY( poh->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
185 0 : explicit_bzero( poh->bytes, 64UL );
186 0 : FD_COMPILER_MFENCE();
187 0 : *halted_seq = poh->result;
188 0 : *state = FD_SET_IDENTITY_STATE_POH_HALTED;
189 0 : FD_LOG_INFO(( "Leader pipeline successfully paused..." ));
190 0 : } else if( FD_UNLIKELY( poh->state==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
191 0 : FD_SPIN_PAUSE();
192 0 : } else if( FD_LIKELY( poh->state==FD_KEYSWITCH_STATE_FAILED ) ) {
193 : /* Failed to switch identity in Agave, so abort the entire process. */
194 0 : *state = FD_SET_IDENTITY_STATE_ALL_SWITCHED;
195 0 : *has_error = 1;
196 0 : } else {
197 0 : FD_LOG_ERR(( "Unexpected poh keyswitch state %lu", poh->state ));
198 0 : }
199 0 : break;
200 0 : }
201 0 : case FD_SET_IDENTITY_STATE_POH_HALTED: {
202 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
203 0 : fd_topo_tile_t const * tile = &topo->tiles[ i ];
204 0 : if( FD_LIKELY( strcmp( tile->name, "shred" ) ) ) continue;
205 :
206 0 : fd_keyswitch_t * shred = fd_topo_obj_laddr( topo, tile->keyswitch_obj_id );
207 0 : FD_TEST( shred );
208 :
209 0 : shred->param = *halted_seq;
210 0 : memcpy( shred->bytes, keypair+32UL, 32UL );
211 0 : FD_COMPILER_MFENCE();
212 0 : shred->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
213 0 : FD_COMPILER_MFENCE();
214 0 : FD_LOG_INFO(( "Flushing in-flight unpublished shreds, must reach seq %lu...", *halted_seq ));
215 0 : }
216 :
217 0 : *state = FD_SET_IDENTITY_STATE_SHRED_FLUSH_REQUESTED;
218 0 : break;
219 0 : }
220 0 : case FD_SET_IDENTITY_STATE_SHRED_FLUSH_REQUESTED: {
221 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
222 0 : fd_topo_tile_t const * tile = &topo->tiles[ i ];
223 0 : if( FD_LIKELY( strcmp( tile->name, "shred" ) ) ) continue;
224 :
225 0 : fd_keyswitch_t * shred = fd_topo_obj_laddr( topo, tile->keyswitch_obj_id );
226 0 : FD_TEST( shred );
227 :
228 0 : if( FD_LIKELY( shred->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
229 0 : continue;
230 0 : } else if( FD_UNLIKELY( shred->state==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
231 : /* If any of the shred tiles is still pending, we need to wait. */
232 0 : FD_SPIN_PAUSE();
233 0 : return;
234 0 : } else {
235 0 : FD_LOG_ERR(( "Unexpected shred:%lu keyswitch state %lu", tile->kind_id, shred->state ));
236 0 : }
237 0 : }
238 :
239 0 : *state = FD_SET_IDENTITY_STATE_SHRED_FLUSHED;
240 0 : FD_LOG_INFO(( "All in-flight shreds published..." ));
241 0 : break;
242 0 : }
243 0 : case FD_SET_IDENTITY_STATE_SHRED_FLUSHED: {
244 0 : fd_keyswitch_t * sign = find_keyswitch( topo, "sign" );
245 0 : memcpy( sign->bytes, keypair, 64UL );
246 0 : FD_COMPILER_MFENCE();
247 0 : explicit_bzero( keypair, 32UL ); /* Private key no longer needed in this process */
248 0 : FD_COMPILER_MFENCE();
249 0 : sign->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
250 0 : FD_COMPILER_MFENCE();
251 :
252 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
253 0 : if( FD_LIKELY( topo->tiles[ i ].keyswitch_obj_id==ULONG_MAX ) ) continue;
254 0 : if( FD_LIKELY( !strcmp( topo->tiles[ i ].name, "sign" ) ||
255 0 : !strcmp( topo->tiles[ i ].name, "poh" ) ||
256 0 : !strcmp( topo->tiles[ i ].name, "shred" ) ) ) continue;
257 :
258 0 : fd_keyswitch_t * tile_ks = fd_topo_obj_laddr( topo, topo->tiles[ i ].keyswitch_obj_id );
259 0 : memcpy( tile_ks->bytes, keypair+32UL, 32UL );
260 0 : FD_COMPILER_MFENCE();
261 0 : tile_ks->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
262 0 : FD_COMPILER_MFENCE();
263 0 : }
264 :
265 0 : FD_LOG_INFO(( "Requesting all tiles switch identity key..." ));
266 0 : *state = FD_SET_IDENTITY_STATE_ALL_SWITCH_REQUESTED;
267 0 : break;
268 0 : }
269 0 : case FD_SET_IDENTITY_STATE_ALL_SWITCH_REQUESTED: {
270 0 : ulong all_switched = 1UL;
271 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
272 0 : if( FD_LIKELY( topo->tiles[ i ].keyswitch_obj_id==ULONG_MAX ) ) continue;
273 0 : if( FD_LIKELY( !strcmp( topo->tiles[ i ].name, "poh" ) ||
274 0 : !strcmp( topo->tiles[ i ].name, "shred" ) ) ) continue;
275 :
276 0 : fd_keyswitch_t * tile_ks = fd_topo_obj_laddr( topo, topo->tiles[ i ].keyswitch_obj_id );
277 0 : if( FD_LIKELY( tile_ks->state==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
278 0 : all_switched = 0UL;
279 0 : break;
280 0 : } else if( FD_UNLIKELY( tile_ks->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
281 0 : if( FD_LIKELY( !strcmp( topo->tiles[ i ].name, "sign" ) ) ) {
282 0 : FD_COMPILER_MFENCE();
283 0 : explicit_bzero( tile_ks->bytes, 64UL );
284 0 : FD_COMPILER_MFENCE();
285 0 : }
286 0 : continue;
287 0 : } else {
288 0 : FD_LOG_ERR(( "Unexpected %s keyswitch state %lu", topo->tiles[ i ].name, tile_ks->state ));
289 0 : }
290 0 : }
291 :
292 0 : if( FD_LIKELY( all_switched ) ) {
293 0 : FD_LOG_INFO(( "All tiles successfully switched identity key..." ));
294 0 : *state = FD_SET_IDENTITY_STATE_ALL_SWITCHED;
295 0 : } else {
296 0 : FD_SPIN_PAUSE();
297 0 : }
298 0 : break;
299 0 : }
300 0 : case FD_SET_IDENTITY_STATE_ALL_SWITCHED: {
301 0 : fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
302 0 : poh->state = FD_KEYSWITCH_STATE_UNHALT_PENDING;
303 0 : FD_LOG_INFO(( "Requesting to unpause leader pipeline..." ));
304 0 : *state = FD_SET_IDENTITY_STATE_POH_UNHALT_REQUESTED;
305 0 : break;
306 0 : }
307 0 : case FD_SET_IDENTITY_STATE_POH_UNHALT_REQUESTED: {
308 0 : fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
309 0 : if( FD_LIKELY( poh->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
310 0 : FD_LOG_INFO(( "Leader pipeline unpaused..." ));
311 0 : poh->state = FD_KEYSWITCH_STATE_UNLOCKED;
312 0 : *state = FD_SET_IDENTITY_STATE_UNLOCKED;
313 0 : } else if( FD_UNLIKELY( poh->state==FD_KEYSWITCH_STATE_UNHALT_PENDING ) ) {
314 0 : FD_SPIN_PAUSE();
315 0 : } else {
316 0 : FD_LOG_ERR(( "Unexpected poh keyswitch state %lu", poh->state ));
317 0 : }
318 0 : break;
319 0 : }
320 0 : }
321 0 : }
322 :
323 : void
324 : set_identity_cmd_args( int * pargc,
325 : char *** pargv,
326 0 : args_t * args) {
327 0 : args->set_identity.require_tower = fd_env_strip_cmdline_contains( pargc, pargv, "--require-tower" );
328 0 : args->set_identity.force = fd_env_strip_cmdline_contains( pargc, pargv, "--force" );
329 :
330 0 : if( FD_UNLIKELY( *pargc<1 ) ) goto err;
331 :
332 0 : char const * path = *pargv[0];
333 0 : (*pargc)--;
334 0 : (*pargv)++;
335 :
336 0 : if( FD_UNLIKELY( !strcmp( path, "-" ) ) ) {
337 0 : args->set_identity.keypair = fd_keyload_alloc_protected_pages( 1UL, 2UL );
338 0 : FD_LOG_STDOUT(( "Reading identity keypair from stdin. Press Ctrl-D when done.\n" ));
339 0 : fd_keyload_read( STDIN_FILENO, "stdin", args->set_identity.keypair );
340 0 : } else {
341 0 : args->set_identity.keypair = fd_keyload_load( path, 0 );
342 0 : }
343 :
344 0 : return;
345 :
346 0 : err:
347 0 : FD_LOG_ERR(( "Usage: fdctl set-identity <keypair> [--require-tower]" ));
348 0 : }
349 :
350 : static void FD_FN_SENSITIVE
351 : set_identity( args_t * args,
352 0 : config_t * config ) {
353 0 : uchar check_public_key[ 32 ];
354 0 : fd_sha512_t sha512[1];
355 0 : FD_TEST( fd_sha512_join( fd_sha512_new( sha512 ) ) );
356 0 : fd_ed25519_public_from_private( check_public_key, args->set_identity.keypair, sha512 );
357 0 : if( FD_UNLIKELY( memcmp( check_public_key, args->set_identity.keypair+32UL, 32UL ) ) )
358 0 : FD_LOG_ERR(( "The public key in the identity key file does not match the public key derived from the private key. "
359 0 : "Firedancer will not use the key pair to sign as it might leak the private key." ));
360 :
361 0 : for( ulong i=0UL; i<config->topo.obj_cnt; i++ ) {
362 0 : fd_topo_obj_t * obj = &config->topo.objs[ i ];
363 0 : if( FD_LIKELY( strcmp( obj->name, "keyswitch" ) ) ) continue;
364 :
365 0 : fd_topo_join_workspace( &config->topo, &config->topo.workspaces[ obj->wksp_id ], FD_SHMEM_JOIN_MODE_READ_WRITE );
366 0 : }
367 :
368 0 : int has_error = 0;
369 0 : ulong state = FD_SET_IDENTITY_STATE_UNLOCKED;
370 0 : ulong halted_seq = 0UL;
371 0 : for(;;) {
372 0 : poll_keyswitch( &config->topo, &state, &halted_seq, args->set_identity.keypair, &has_error, args->set_identity.require_tower, args->set_identity.force );
373 0 : if( FD_UNLIKELY( FD_SET_IDENTITY_STATE_UNLOCKED==state ) ) break;
374 0 : }
375 :
376 0 : char identity_key_base58[ FD_BASE58_ENCODED_32_SZ ];
377 0 : fd_base58_encode_32( args->set_identity.keypair+32UL, NULL, identity_key_base58 );
378 0 : identity_key_base58[ FD_BASE58_ENCODED_32_SZ-1UL ] = '\0';
379 :
380 0 : if( FD_UNLIKELY( has_error ) ) FD_LOG_ERR(( "Failed to switch identity key to `%s`, check validator logs for details", identity_key_base58 ));
381 0 : else FD_LOG_NOTICE(( "Validator identity key switched to `%s`", identity_key_base58 ));
382 0 : }
383 :
384 : void
385 : set_identity_cmd_fn( args_t * args,
386 0 : config_t * config ) {
387 0 : set_identity( args, config );
388 0 : }
389 :
390 : action_t fd_action_set_identity = {
391 : .name = "set-identity",
392 : .args = set_identity_cmd_args,
393 : .fn = set_identity_cmd_fn,
394 : .require_config = 1,
395 : .perm = NULL,
396 : .description = "Change the identity of a running validator",
397 : };
|