Line data Source code
1 : #define _GNU_SOURCE
2 : #include "run/run.h"
3 :
4 : #include "../fd_cap_chk.h"
5 : #include "../../../disco/keyguard/fd_keyswitch.h"
6 : #include "../../../disco/keyguard/fd_keyload.h"
7 : #include "../../../tango/fd_tango.h"
8 : #include "../../../util/fd_util.h"
9 :
10 : #include <strings.h>
11 : #include <unistd.h>
12 : #include <sys/resource.h>
13 :
14 : /* The process of switching identity of the validator is somewhat
15 : involved, to prevent it from producing torn data (for example,
16 : a block where half the shreds are signed by one private key, and half
17 : are signed by another).
18 :
19 : The process of switching is a state machine that progresses linearly
20 : through each of the states. Generally, no transitions are allowed
21 : except direct forward steps, except in emergency recovery cases an
22 : operator can force the state back to unlocked.
23 :
24 : The states follow, in order. */
25 :
26 : /* State 0: UNLOCKED.
27 : The validator is not currently in the process of switching keys. */
28 0 : #define FD_SET_IDENTITY_STATE_UNLOCKED (0UL)
29 :
30 : /* State 1: LOCKED
31 : Some client to the validator has requested a key switch. To do so,
32 : it acquired an exclusive lock on the validator to prevent the
33 : switch potentially being interleaved with another client. */
34 0 : #define FD_SET_IDENTITY_STATE_LOCKED (1UL)
35 :
36 : /* State 2: POH_HALT_REQUESTED
37 : The first step in the key switch process is to pause the leader
38 : pipeline of the validator, preventing us from becoming leader, but
39 : finishing any currently in progress leader slot if there is one.
40 : While in this state, the validator is waiting for the leader
41 : pipeline to confirm that it has paused production, and is no longer
42 : leader.
43 :
44 : This halt request also causes the PoH tile to switch both:
45 :
46 : (a) The identity key used by the PoH tile itself, used to
47 : determine when this validator is leader in the schedule.
48 :
49 : (b) The key used by the Agave sub-process, if running
50 : Frankendancer. The Agave key is inside a Mutex<> so it is
51 : swapped atomically across all consumers. */
52 0 : #define FD_SET_IDENTITY_STATE_POH_HALT_REQUESTED (2UL)
53 :
54 : /* State 3: POH_HALTED
55 : The PoH tile has confirmed that it has halted the leader pipeline,
56 : and the validator is no longer leader. No more blocks will be
57 : produced until it is unhalted. In addition, the PoH tile has
58 : switched both its own identity key and the Agave key. */
59 0 : #define FD_SET_IDENTITY_STATE_POH_HALTED (3UL)
60 :
61 : /* State 4: SHRED_FLUSH_REQUESTED
62 : Once the leader pipeline is halted, it must be flushed, meaning any
63 : in-flight shreds that could potentially need to be signed with the
64 : old key are signed and sent to the network. This doesn't strictly
65 : need to happen before other tiles have their key flushed, but it
66 : makes the control flow easier to understand if we do this as an
67 : explicit step.
68 :
69 : The shred tile is flushed by telling it the last sequence number
70 : the PoH tile has produced for an outgoing shred, at the time it was
71 : halted, and then waiting for the shred tile to confirm that it has
72 : seen and processed all shreds up to and including that sequence
73 : number.
74 :
75 : In addition to flushing out any in-flight shreds, this also causes
76 : the shred tile to switch the identity key it uses internally, for
77 : determining where this validator is positioned in the Turbine tree. */
78 0 : #define FD_SET_IDENTITY_STATE_SHRED_FLUSH_REQUESTED (4UL)
79 :
80 : /* State 5: SHRED_FLUSHED
81 : The shred tile confirms that it has seen and processed all shreds
82 : up to and including the last sequence number produced by the PoH
83 : tile at the time it was halted. The shred tile has also switched
84 : its own identity key when it indicates the flush is complete. */
85 0 : #define FD_SET_IDENTITY_STATE_SHRED_FLUSHED (5UL)
86 :
87 : /* State 6: ALL_SWITCH_REQUESTED
88 : The client now requests that all other tiles which consume the
89 : identity key in some way switch to the new key. The leader
90 : pipeline is still halted, although it doesn't strictly need to be,
91 : since outgoing shreds have been flushed. This is done to keep the
92 : control flow simpler.
93 :
94 : The other tiles using the identity key are:
95 :
96 : (a) Sign. The sign tile is responsible for holding the private
97 : key.
98 : (b) GUI. The GUI shows the validator identity key to the user,
99 : and uses the key to determine which blocks are ours for
100 : highlighting on the frontend.
101 : (c) Event. Outgoing events to the event server are signed with
102 : the identity key to authenticate the sender.
103 : (d) Bundle. The validator must authenticate to any connected
104 : bundle server with the identity key to prove it is on the
105 : leader schedule. */
106 0 : #define FD_SET_IDENTITY_STATE_ALL_SWITCH_REQUESTED (6UL)
107 :
108 : /* State 7: ALL_SWITCHED
109 : All remaining tiles that use the identity key have confirmed that
110 : they have switched to the new key. The validator is now fully
111 : switched over. */
112 0 : #define FD_SET_IDENTITY_STATE_ALL_SWITCHED (7UL)
113 :
114 : /* State 8: POH_UNHALT_REQUESTED
115 : The final state, now that all tiles have switched, the leader
116 : pipeline can be unblocked and the validator can resume producing
117 : blocks. The next state once the PoH tile confirms the leader
118 : pipeline is unlocked, is UNLOCKED. */
119 0 : #define FD_SET_IDENTITY_STATE_POH_UNHALT_REQUESTED (8UL)
120 :
121 : void
122 : set_identity_cmd_perm( args_t * args FD_PARAM_UNUSED,
123 : fd_cap_chk_t * chk,
124 0 : config_t const * config FD_PARAM_UNUSED ) {
125 : /* 5 huge pages for the key storage area */
126 0 : ulong mlock_limit = 5UL * FD_SHMEM_NORMAL_PAGE_SZ;
127 0 : fd_cap_chk_raise_rlimit( chk, "set-identity", RLIMIT_MEMLOCK, mlock_limit, "call `rlimit(2)` to increase `RLIMIT_MEMLOCK` so all memory can be locked with `mlock(2)`" );
128 0 : }
129 :
130 : fd_keyswitch_t *
131 : find_keyswitch( fd_topo_t const * topo,
132 0 : char const * tile_name ) {
133 0 : ulong tile_idx = fd_topo_find_tile( topo, tile_name, 0UL );
134 0 : FD_TEST( tile_idx!=ULONG_MAX );
135 0 : FD_TEST( topo->tiles[ tile_idx ].keyswitch_obj_id!=ULONG_MAX );
136 :
137 0 : fd_keyswitch_t * keyswitch = fd_topo_obj_laddr( topo, topo->tiles[ tile_idx ].keyswitch_obj_id );
138 0 : FD_TEST( keyswitch );
139 0 : return keyswitch;
140 0 : }
141 :
142 : static void FD_FN_SENSITIVE
143 : poll_keyswitch( fd_topo_t * topo,
144 : ulong * state,
145 : ulong * halted_seq,
146 : uchar * keypair,
147 : int * has_error,
148 : int require_tower,
149 0 : int force_lock ) {
150 0 : switch( *state ) {
151 0 : case FD_SET_IDENTITY_STATE_UNLOCKED: {
152 0 : fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
153 0 : if( FD_LIKELY( FD_KEYSWITCH_STATE_UNLOCKED==FD_ATOMIC_CAS( &poh->state, FD_KEYSWITCH_STATE_UNLOCKED, FD_KEYSWITCH_STATE_LOCKED ) ) ) {
154 0 : *state = FD_SET_IDENTITY_STATE_LOCKED;
155 0 : FD_LOG_INFO(( "Locking validator identity for key switch..." ));
156 0 : } else {
157 0 : if( FD_UNLIKELY( force_lock ) ) {
158 0 : *state = FD_SET_IDENTITY_STATE_LOCKED;
159 0 : FD_LOG_WARNING(( "Another process was changing keys, but `--force` supplied. Forcing lock on validator identity for key switch..." ));
160 0 : } else {
161 0 : FD_LOG_ERR(( "Cannot set-identity because Firedancer is already in the process of switching keys. If you are not currently "
162 0 : "changing the identity, it might be because an identity change was abandoned. To recover, run the `set-identity` "
163 0 : "command again with the `--force` argument." ));
164 0 : }
165 0 : }
166 0 : break;
167 0 : }
168 0 : case FD_SET_IDENTITY_STATE_LOCKED: {
169 0 : fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
170 0 : memcpy( poh->bytes, keypair, 64UL );
171 0 : poh->param = !!require_tower;
172 0 : FD_COMPILER_MFENCE();
173 0 : poh->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
174 0 : FD_COMPILER_MFENCE();
175 0 : *state = FD_SET_IDENTITY_STATE_POH_HALT_REQUESTED;
176 0 : FD_LOG_INFO(( "Pausing leader pipeline for key switch..." ));
177 0 : break;
178 0 : }
179 0 : case FD_SET_IDENTITY_STATE_POH_HALT_REQUESTED: {
180 0 : fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
181 0 : if( FD_LIKELY( poh->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
182 0 : explicit_bzero( poh->bytes, 64UL );
183 0 : FD_COMPILER_MFENCE();
184 0 : *halted_seq = poh->result;
185 0 : *state = FD_SET_IDENTITY_STATE_POH_HALTED;
186 0 : FD_LOG_INFO(( "Leader pipeline successfully paused..." ));
187 0 : } else if( FD_UNLIKELY( poh->state==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
188 0 : FD_SPIN_PAUSE();
189 0 : } else if( FD_LIKELY( poh->state==FD_KEYSWITCH_STATE_FAILED ) ) {
190 : /* Failed to switch identity in Agave, so abort the entire process. */
191 0 : *state = FD_SET_IDENTITY_STATE_ALL_SWITCHED;
192 0 : *has_error = 1;
193 0 : } else {
194 0 : FD_LOG_ERR(( "Unexpected poh keyswitch state %lu", poh->state ));
195 0 : }
196 0 : break;
197 0 : }
198 0 : case FD_SET_IDENTITY_STATE_POH_HALTED: {
199 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
200 0 : fd_topo_tile_t const * tile = &topo->tiles[ i ];
201 0 : if( FD_LIKELY( strcmp( tile->name, "shred" ) ) ) continue;
202 :
203 0 : fd_keyswitch_t * shred = fd_topo_obj_laddr( topo, tile->keyswitch_obj_id );
204 0 : FD_TEST( shred );
205 :
206 0 : shred->param = *halted_seq;
207 0 : memcpy( shred->bytes, keypair+32UL, 32UL );
208 0 : FD_COMPILER_MFENCE();
209 0 : shred->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
210 0 : FD_COMPILER_MFENCE();
211 0 : FD_LOG_INFO(( "Flushing in-flight unpublished shreds, must reach seq %lu...", *halted_seq ));
212 0 : }
213 :
214 0 : *state = FD_SET_IDENTITY_STATE_SHRED_FLUSH_REQUESTED;
215 0 : break;
216 0 : }
217 0 : case FD_SET_IDENTITY_STATE_SHRED_FLUSH_REQUESTED: {
218 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
219 0 : fd_topo_tile_t const * tile = &topo->tiles[ i ];
220 0 : if( FD_LIKELY( strcmp( tile->name, "shred" ) ) ) continue;
221 :
222 0 : fd_keyswitch_t * shred = fd_topo_obj_laddr( topo, tile->keyswitch_obj_id );
223 0 : FD_TEST( shred );
224 :
225 0 : if( FD_LIKELY( shred->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
226 0 : continue;
227 0 : } else if( FD_UNLIKELY( shred->state==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
228 : /* If any of the shred tiles is still pending, we need to wait. */
229 0 : FD_SPIN_PAUSE();
230 0 : return;
231 0 : } else {
232 0 : FD_LOG_ERR(( "Unexpected shred:%lu keyswitch state %lu", tile->kind_id, shred->state ));
233 0 : }
234 0 : }
235 :
236 0 : *state = FD_SET_IDENTITY_STATE_SHRED_FLUSHED;
237 0 : FD_LOG_INFO(( "All in-flight shreds published..." ));
238 0 : break;
239 0 : }
240 0 : case FD_SET_IDENTITY_STATE_SHRED_FLUSHED: {
241 0 : fd_keyswitch_t * sign = find_keyswitch( topo, "sign" );
242 0 : memcpy( sign->bytes, keypair, 64UL );
243 0 : FD_COMPILER_MFENCE();
244 0 : explicit_bzero( keypair, 32UL ); /* Private key no longer needed in this process */
245 0 : FD_COMPILER_MFENCE();
246 0 : sign->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
247 0 : FD_COMPILER_MFENCE();
248 :
249 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
250 0 : if( FD_LIKELY( topo->tiles[ i ].keyswitch_obj_id==ULONG_MAX ) ) continue;
251 0 : if( FD_LIKELY( !strcmp( topo->tiles[ i ].name, "sign" ) ||
252 0 : !strcmp( topo->tiles[ i ].name, "poh" ) ||
253 0 : !strcmp( topo->tiles[ i ].name, "shred" ) ) ) continue;
254 :
255 0 : fd_keyswitch_t * tile_ks = fd_topo_obj_laddr( topo, topo->tiles[ i ].keyswitch_obj_id );
256 0 : memcpy( tile_ks->bytes, keypair+32UL, 32UL );
257 0 : FD_COMPILER_MFENCE();
258 0 : tile_ks->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
259 0 : FD_COMPILER_MFENCE();
260 0 : }
261 :
262 0 : FD_LOG_INFO(( "Requesting all tiles switch identity key..." ));
263 0 : *state = FD_SET_IDENTITY_STATE_ALL_SWITCH_REQUESTED;
264 0 : break;
265 0 : }
266 0 : case FD_SET_IDENTITY_STATE_ALL_SWITCH_REQUESTED: {
267 0 : ulong all_switched = 1UL;
268 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
269 0 : if( FD_LIKELY( topo->tiles[ i ].keyswitch_obj_id==ULONG_MAX ) ) continue;
270 0 : if( FD_LIKELY( !strcmp( topo->tiles[ i ].name, "poh" ) ||
271 0 : !strcmp( topo->tiles[ i ].name, "shred" ) ) ) continue;
272 :
273 0 : fd_keyswitch_t * tile_ks = fd_topo_obj_laddr( topo, topo->tiles[ i ].keyswitch_obj_id );
274 0 : if( FD_LIKELY( tile_ks->state==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
275 0 : all_switched = 0UL;
276 0 : break;
277 0 : } else if( FD_UNLIKELY( tile_ks->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
278 0 : if( FD_LIKELY( !strcmp( topo->tiles[ i ].name, "sign" ) ) ) {
279 0 : FD_COMPILER_MFENCE();
280 0 : explicit_bzero( tile_ks->bytes, 64UL );
281 0 : FD_COMPILER_MFENCE();
282 0 : }
283 0 : continue;
284 0 : } else {
285 0 : FD_LOG_ERR(( "Unexpected %s keyswitch state %lu", topo->tiles[ i ].name, tile_ks->state ));
286 0 : }
287 0 : }
288 :
289 0 : if( FD_LIKELY( all_switched ) ) {
290 0 : FD_LOG_INFO(( "All tiles successfully switched identity key..." ));
291 0 : *state = FD_SET_IDENTITY_STATE_ALL_SWITCHED;
292 0 : } else {
293 0 : FD_SPIN_PAUSE();
294 0 : }
295 0 : break;
296 0 : }
297 0 : case FD_SET_IDENTITY_STATE_ALL_SWITCHED: {
298 0 : fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
299 0 : poh->state = FD_KEYSWITCH_STATE_UNHALT_PENDING;
300 0 : FD_LOG_INFO(( "Requesting to unpause leader pipeline..." ));
301 0 : *state = FD_SET_IDENTITY_STATE_POH_UNHALT_REQUESTED;
302 0 : break;
303 0 : }
304 0 : case FD_SET_IDENTITY_STATE_POH_UNHALT_REQUESTED: {
305 0 : fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
306 0 : if( FD_LIKELY( poh->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
307 0 : FD_LOG_INFO(( "Leader pipeline unpaused..." ));
308 0 : poh->state = FD_KEYSWITCH_STATE_UNLOCKED;
309 0 : *state = FD_SET_IDENTITY_STATE_UNLOCKED;
310 0 : } else if( FD_UNLIKELY( poh->state==FD_KEYSWITCH_STATE_UNHALT_PENDING ) ) {
311 0 : FD_SPIN_PAUSE();
312 0 : } else {
313 0 : FD_LOG_ERR(( "Unexpected poh keyswitch state %lu", poh->state ));
314 0 : }
315 0 : break;
316 0 : }
317 0 : }
318 0 : }
319 :
320 : void
321 : set_identity_cmd_args( int * pargc,
322 : char *** pargv,
323 0 : args_t * args) {
324 0 : args->set_identity.require_tower = fd_env_strip_cmdline_contains( pargc, pargv, "--require-tower" );
325 0 : args->set_identity.force = fd_env_strip_cmdline_contains( pargc, pargv, "--force" );
326 :
327 0 : if( FD_UNLIKELY( *pargc<1 ) ) goto err;
328 :
329 0 : char const * path = *pargv[0];
330 0 : (*pargc)--;
331 0 : (*pargv)++;
332 :
333 0 : if( FD_UNLIKELY( !strcmp( path, "-" ) ) ) {
334 0 : args->set_identity.keypair = fd_keyload_alloc_protected_pages( 1UL, 2UL );
335 0 : FD_LOG_STDOUT(( "Reading identity keypair from stdin. Press Ctrl-D when done.\n" ));
336 0 : fd_keyload_read( STDIN_FILENO, "stdin", args->set_identity.keypair );
337 0 : } else {
338 0 : args->set_identity.keypair = fd_keyload_load( path, 0 );
339 0 : }
340 :
341 0 : return;
342 :
343 0 : err:
344 0 : FD_LOG_ERR(( "Usage: fdctl set-identity <keypair> [--require-tower]" ));
345 0 : }
346 :
347 : static void FD_FN_SENSITIVE
348 : set_identity( args_t * args,
349 0 : config_t * config ) {
350 0 : uchar check_public_key[ 32 ];
351 0 : fd_sha512_t sha512[1];
352 0 : FD_TEST( fd_sha512_join( fd_sha512_new( sha512 ) ) );
353 0 : fd_ed25519_public_from_private( check_public_key, args->set_identity.keypair, sha512 );
354 0 : if( FD_UNLIKELY( memcmp( check_public_key, args->set_identity.keypair+32UL, 32UL ) ) )
355 0 : FD_LOG_ERR(( "The public key in the identity key file does not match the public key derived from the private key. "
356 0 : "Firedancer will not use the key pair to sign as it might leak the private key." ));
357 :
358 0 : for( ulong i=0UL; i<config->topo.obj_cnt; i++ ) {
359 0 : fd_topo_obj_t * obj = &config->topo.objs[ i ];
360 0 : if( FD_LIKELY( strcmp( obj->name, "keyswitch" ) ) ) continue;
361 :
362 0 : fd_topo_join_workspace( &config->topo, &config->topo.workspaces[ obj->wksp_id ], FD_SHMEM_JOIN_MODE_READ_WRITE );
363 0 : }
364 :
365 0 : int has_error = 0;
366 0 : ulong state = FD_SET_IDENTITY_STATE_UNLOCKED;
367 0 : ulong halted_seq = 0UL;
368 0 : for(;;) {
369 0 : poll_keyswitch( &config->topo, &state, &halted_seq, args->set_identity.keypair, &has_error, args->set_identity.require_tower, args->set_identity.force );
370 0 : if( FD_UNLIKELY( FD_SET_IDENTITY_STATE_UNLOCKED==state ) ) break;
371 0 : }
372 :
373 0 : char identity_key_base58[ FD_BASE58_ENCODED_32_SZ ];
374 0 : fd_base58_encode_32( args->set_identity.keypair+32UL, NULL, identity_key_base58 );
375 0 : identity_key_base58[ FD_BASE58_ENCODED_32_SZ-1UL ] = '\0';
376 :
377 0 : if( FD_UNLIKELY( has_error ) ) FD_LOG_ERR(( "Failed to switch identity key to `%s`, check validator logs for details", identity_key_base58 ));
378 0 : else FD_LOG_NOTICE(( "Validator identity key switched to `%s`", identity_key_base58 ));
379 0 : }
380 :
381 : void
382 : set_identity_cmd_fn( args_t * args,
383 0 : config_t * config ) {
384 0 : set_identity( args, config );
385 0 : }
|