LCOV - code coverage report
Current view: top level - app/shared/commands - set_identity.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 225 0.0 %
Date: 2025-09-14 04:42:20 Functions: 0 6 0.0 %

          Line data    Source code
       1             : #define _GNU_SOURCE
       2             : #include "run/run.h"
       3             : 
       4             : #include "../../platform/fd_cap_chk.h"
       5             : #include "../../../disco/keyguard/fd_keyswitch.h"
       6             : #include "../../../disco/keyguard/fd_keyload.h"
       7             : #include "../../../tango/fd_tango.h"
       8             : #include "../../../util/fd_util.h"
       9             : 
      10             : #include <strings.h>
      11             : #include <unistd.h>
      12             : #include <sys/resource.h>
      13             : 
      14             : /* The process of switching identity of the validator is somewhat
      15             :    involved, to prevent it from producing torn data (for example,
      16             :    a block where half the shreds are signed by one private key, and half
      17             :    are signed by another).
      18             : 
      19             :    The process of switching is a state machine that progresses linearly
      20             :    through each of the states.  Generally, no transitions are allowed
      21             :    except direct forward steps, except in emergency recovery cases an
      22             :    operator can force the state back to unlocked.
      23             : 
      24             :    The states follow, in order. */
      25             : 
      26             : /* State 0: UNLOCKED.
      27             :      The validator is not currently in the process of switching keys. */
      28           0 : #define FD_SET_IDENTITY_STATE_UNLOCKED              (0UL)
      29             : 
      30             : /* State 1: LOCKED
      31             :      Some client to the validator has requested a key switch.  To do so,
      32             :      it acquired an exclusive lock on the validator to prevent the
      33             :      switch potentially being interleaved with another client. */
      34           0 : #define FD_SET_IDENTITY_STATE_LOCKED                (1UL)
      35             : 
      36             : /* State 2: POH_HALT_REQUESTED
      37             :      The first step in the key switch process is to pause the leader
      38             :      pipeline of the validator, preventing us from becoming leader, but
      39             :      finishing any currently in progress leader slot if there is one.
      40             :      While in this state, the validator is waiting for the leader
      41             :      pipeline to confirm that it has paused production, and is no longer
      42             :      leader.
      43             : 
      44             :      This halt request also causes the PoH tile to switch both:
      45             : 
      46             :        (a) The identity key used by the PoH tile itself, used to
      47             :            determine when this validator is leader in the schedule.
      48             : 
      49             :        (b) The key used by the Agave sub-process, if running
      50             :            Frankendancer.  The Agave key is inside a Mutex<> so it is
      51             :            swapped atomically across all consumers. */
      52           0 : #define FD_SET_IDENTITY_STATE_POH_HALT_REQUESTED    (2UL)
      53             : 
      54             : /* State 3: POH_HALTED
      55             :      The PoH tile has confirmed that it has halted the leader pipeline,
      56             :      and the validator is no longer leader.  No more blocks will be
      57             :      produced until it is unhalted.  In addition, the PoH tile has
      58             :      switched both its own identity key and the Agave key. */
      59           0 : #define FD_SET_IDENTITY_STATE_POH_HALTED            (3UL)
      60             : 
      61             : /* State 4: SHRED_FLUSH_REQUESTED
      62             :      Once the leader pipeline is halted, it must be flushed, meaning any
      63             :      in-flight shreds that could potentially need to be signed with the
      64             :      old key are signed and sent to the network.  This doesn't strictly
      65             :      need to happen before other tiles have their key flushed, but it
      66             :      makes the control flow easier to understand if we do this as an
      67             :      explicit step.
      68             : 
      69             :      The shred tile is flushed by telling it the last sequence number
      70             :      the PoH tile has produced for an outgoing shred, at the time it was
      71             :      halted, and then waiting for the shred tile to confirm that it has
      72             :      seen and processed all shreds up to and including that sequence
      73             :      number.
      74             : 
      75             :      In addition to flushing out any in-flight shreds, this also causes
      76             :      the shred tile to switch the identity key it uses internally, for
      77             :      determining where this validator is positioned in the Turbine tree. */
      78           0 : #define FD_SET_IDENTITY_STATE_SHRED_FLUSH_REQUESTED (4UL)
      79             : 
      80             : /* State 5: SHRED_FLUSHED
      81             :      The shred tile confirms that it has seen and processed all shreds
      82             :      up to and including the last sequence number produced by the PoH
      83             :      tile at the time it was halted.  The shred tile has also switched
      84             :      its own identity key when it indicates the flush is complete. */
      85           0 : #define FD_SET_IDENTITY_STATE_SHRED_FLUSHED         (5UL)
      86             : 
      87             : /* State 6: ALL_SWITCH_REQUESTED
      88             :      The client now requests that all other tiles which consume the
      89             :      identity key in some way switch to the new key.  The leader
      90             :      pipeline is still halted, although it doesn't strictly need to be,
      91             :      since outgoing shreds have been flushed.  This is done to keep the
      92             :      control flow simpler.
      93             : 
      94             :      The other tiles using the identity key are:
      95             : 
      96             :        (a) Sign.  The sign tile is responsible for holding the private
      97             :            key.
      98             :        (b) GUI.  The GUI shows the validator identity key to the user,
      99             :            and uses the key to determine which blocks are ours for
     100             :            highlighting on the frontend.
     101             :        (c) Event.  Outgoing events to the event server are signed with
     102             :            the identity key to authenticate the sender.
     103             :        (d) Bundle.  The validator must authenticate to any connected
     104             :            bundle server with the identity key to prove it is on the
     105             :            leader schedule.,
     106             :        (e) Gossip. The gossip tile sends out ContactInfo messages with
     107             :            our identity key, and also uses the identity key to sign
     108             :            outgoing gossip messages. */
     109           0 : #define FD_SET_IDENTITY_STATE_ALL_SWITCH_REQUESTED  (6UL)
     110             : 
     111             : /* State 7: ALL_SWITCHED
     112             :      All remaining tiles that use the identity key have confirmed that
     113             :      they have switched to the new key.  The validator is now fully
     114             :      switched over. */
     115           0 : #define FD_SET_IDENTITY_STATE_ALL_SWITCHED          (7UL)
     116             : 
     117             : /* State 8: POH_UNHALT_REQUESTED
     118             :      The final state, now that all tiles have switched, the leader
     119             :      pipeline can be unblocked and the validator can resume producing
     120             :      blocks.  The next state once the PoH tile confirms the leader
     121             :      pipeline is unlocked, is UNLOCKED. */
     122           0 : #define FD_SET_IDENTITY_STATE_POH_UNHALT_REQUESTED  (8UL)
     123             : 
     124             : void
     125             : set_identity_cmd_perm( args_t *         args   FD_PARAM_UNUSED,
     126             :                        fd_cap_chk_t *   chk,
     127           0 :                        config_t const * config FD_PARAM_UNUSED ) {
     128             :   /* 5 huge pages for the key storage area */
     129           0 :   ulong mlock_limit = 5UL * FD_SHMEM_NORMAL_PAGE_SZ;
     130           0 :   fd_cap_chk_raise_rlimit( chk, "set-identity", RLIMIT_MEMLOCK, mlock_limit, "call `rlimit(2)` to increase `RLIMIT_MEMLOCK` so all memory can be locked with `mlock(2)`" );
     131           0 : }
     132             : 
     133             : static fd_keyswitch_t *
     134             : find_keyswitch( fd_topo_t const * topo,
     135           0 :                 char const *      tile_name ) {
     136           0 :   ulong tile_idx = fd_topo_find_tile( topo, tile_name, 0UL );
     137           0 :   FD_TEST( tile_idx!=ULONG_MAX );
     138           0 :   FD_TEST( topo->tiles[ tile_idx ].keyswitch_obj_id!=ULONG_MAX );
     139             : 
     140           0 :   fd_keyswitch_t * keyswitch = fd_topo_obj_laddr( topo, topo->tiles[ tile_idx ].keyswitch_obj_id );
     141           0 :   FD_TEST( keyswitch );
     142           0 :   return keyswitch;
     143           0 : }
     144             : 
     145             : static void FD_FN_SENSITIVE
     146             : poll_keyswitch( fd_topo_t * topo,
     147             :                 ulong *     state,
     148             :                 ulong *     halted_seq,
     149             :                 uchar *     keypair,
     150             :                 int *       has_error,
     151             :                 int         require_tower,
     152           0 :                 int         force_lock ) {
     153           0 :   switch( *state ) {
     154           0 :     case FD_SET_IDENTITY_STATE_UNLOCKED: {
     155           0 :       fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
     156           0 :       if( FD_LIKELY( FD_KEYSWITCH_STATE_UNLOCKED==FD_ATOMIC_CAS( &poh->state, FD_KEYSWITCH_STATE_UNLOCKED, FD_KEYSWITCH_STATE_LOCKED ) ) ) {
     157           0 :         *state = FD_SET_IDENTITY_STATE_LOCKED;
     158           0 :         FD_LOG_INFO(( "Locking validator identity for key switch..." ));
     159           0 :       } else {
     160           0 :         if( FD_UNLIKELY( force_lock ) ) {
     161           0 :           *state = FD_SET_IDENTITY_STATE_LOCKED;
     162           0 :           FD_LOG_WARNING(( "Another process was changing keys, but `--force` supplied. Forcing lock on validator identity for key switch..." ));
     163           0 :         } else {
     164           0 :           FD_LOG_ERR(( "Cannot set-identity because Firedancer is already in the process of switching keys. If you are not currently "
     165           0 :                        "changing the identity, it might be because an identity change was abandoned. To recover, run the `set-identity` "
     166           0 :                        "command again with the `--force` argument." ));
     167           0 :         }
     168           0 :       }
     169           0 :       break;
     170           0 :     }
     171           0 :     case FD_SET_IDENTITY_STATE_LOCKED: {
     172           0 :       fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
     173           0 :       memcpy( poh->bytes, keypair, 64UL );
     174           0 :       poh->param = !!require_tower;
     175           0 :       FD_COMPILER_MFENCE();
     176           0 :       poh->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
     177           0 :       FD_COMPILER_MFENCE();
     178           0 :       *state = FD_SET_IDENTITY_STATE_POH_HALT_REQUESTED;
     179           0 :       FD_LOG_INFO(( "Pausing leader pipeline for key switch..." ));
     180           0 :       break;
     181           0 :     }
     182           0 :     case FD_SET_IDENTITY_STATE_POH_HALT_REQUESTED: {
     183           0 :       fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
     184           0 :       if( FD_LIKELY( poh->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
     185           0 :         explicit_bzero( poh->bytes, 64UL );
     186           0 :         FD_COMPILER_MFENCE();
     187           0 :         *halted_seq = poh->result;
     188           0 :         *state = FD_SET_IDENTITY_STATE_POH_HALTED;
     189           0 :         FD_LOG_INFO(( "Leader pipeline successfully paused..." ));
     190           0 :       } else if( FD_UNLIKELY( poh->state==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
     191           0 :         FD_SPIN_PAUSE();
     192           0 :       } else if( FD_LIKELY( poh->state==FD_KEYSWITCH_STATE_FAILED ) ) {
     193             :         /* Failed to switch identity in Agave, so abort the entire process. */
     194           0 :         *state = FD_SET_IDENTITY_STATE_ALL_SWITCHED;
     195           0 :         *has_error = 1;
     196           0 :       } else {
     197           0 :         FD_LOG_ERR(( "Unexpected poh keyswitch state %lu", poh->state ));
     198           0 :       }
     199           0 :       break;
     200           0 :     }
     201           0 :     case FD_SET_IDENTITY_STATE_POH_HALTED: {
     202           0 :       for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
     203           0 :         fd_topo_tile_t const * tile = &topo->tiles[ i ];
     204           0 :         if( FD_LIKELY( strcmp( tile->name, "shred" ) ) ) continue;
     205             : 
     206           0 :         fd_keyswitch_t * shred = fd_topo_obj_laddr( topo, tile->keyswitch_obj_id );
     207           0 :         FD_TEST( shred );
     208             : 
     209           0 :         shred->param = *halted_seq;
     210           0 :         memcpy( shred->bytes, keypair+32UL, 32UL );
     211           0 :         FD_COMPILER_MFENCE();
     212           0 :         shred->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
     213           0 :         FD_COMPILER_MFENCE();
     214           0 :         FD_LOG_INFO(( "Flushing in-flight unpublished shreds, must reach seq %lu...", *halted_seq ));
     215           0 :       }
     216             : 
     217           0 :       *state = FD_SET_IDENTITY_STATE_SHRED_FLUSH_REQUESTED;
     218           0 :       break;
     219           0 :     }
     220           0 :     case FD_SET_IDENTITY_STATE_SHRED_FLUSH_REQUESTED: {
     221           0 :       for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
     222           0 :         fd_topo_tile_t const * tile = &topo->tiles[ i ];
     223           0 :         if( FD_LIKELY( strcmp( tile->name, "shred" ) ) ) continue;
     224             : 
     225           0 :         fd_keyswitch_t * shred = fd_topo_obj_laddr( topo, tile->keyswitch_obj_id );
     226           0 :         FD_TEST( shred );
     227             : 
     228           0 :         if( FD_LIKELY( shred->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
     229           0 :           continue;
     230           0 :         } else if( FD_UNLIKELY( shred->state==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
     231             :           /* If any of the shred tiles is still pending, we need to wait. */
     232           0 :           FD_SPIN_PAUSE();
     233           0 :           return;
     234           0 :         } else {
     235           0 :           FD_LOG_ERR(( "Unexpected shred:%lu keyswitch state %lu", tile->kind_id, shred->state ));
     236           0 :         }
     237           0 :       }
     238             : 
     239           0 :       *state = FD_SET_IDENTITY_STATE_SHRED_FLUSHED;
     240           0 :       FD_LOG_INFO(( "All in-flight shreds published..." ));
     241           0 :       break;
     242           0 :     }
     243           0 :     case FD_SET_IDENTITY_STATE_SHRED_FLUSHED: {
     244           0 :       fd_keyswitch_t * sign = find_keyswitch( topo, "sign" );
     245           0 :       memcpy( sign->bytes, keypair, 64UL );
     246           0 :       FD_COMPILER_MFENCE();
     247           0 :       explicit_bzero( keypair, 32UL ); /* Private key no longer needed in this process */
     248           0 :       FD_COMPILER_MFENCE();
     249           0 :       sign->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
     250           0 :       FD_COMPILER_MFENCE();
     251             : 
     252           0 :       for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
     253           0 :         if( FD_LIKELY( topo->tiles[ i ].keyswitch_obj_id==ULONG_MAX ) ) continue;
     254           0 :         if( FD_LIKELY( !strcmp( topo->tiles[ i ].name, "sign" ) ||
     255           0 :                        !strcmp( topo->tiles[ i ].name, "poh" ) ||
     256           0 :                        !strcmp( topo->tiles[ i ].name, "shred" ) ) ) continue;
     257             : 
     258           0 :         fd_keyswitch_t * tile_ks = fd_topo_obj_laddr( topo, topo->tiles[ i ].keyswitch_obj_id );
     259           0 :         memcpy( tile_ks->bytes, keypair+32UL, 32UL );
     260           0 :         FD_COMPILER_MFENCE();
     261           0 :         tile_ks->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
     262           0 :         FD_COMPILER_MFENCE();
     263           0 :       }
     264             : 
     265           0 :       FD_LOG_INFO(( "Requesting all tiles switch identity key..." ));
     266           0 :       *state = FD_SET_IDENTITY_STATE_ALL_SWITCH_REQUESTED;
     267           0 :       break;
     268           0 :     }
     269           0 :     case FD_SET_IDENTITY_STATE_ALL_SWITCH_REQUESTED: {
     270           0 :       ulong all_switched = 1UL;
     271           0 :       for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
     272           0 :         if( FD_LIKELY( topo->tiles[ i ].keyswitch_obj_id==ULONG_MAX ) ) continue;
     273           0 :         if( FD_LIKELY( !strcmp( topo->tiles[ i ].name, "poh" ) ||
     274           0 :                        !strcmp( topo->tiles[ i ].name, "shred" ) ) ) continue;
     275             : 
     276           0 :         fd_keyswitch_t * tile_ks = fd_topo_obj_laddr( topo, topo->tiles[ i ].keyswitch_obj_id );
     277           0 :         if( FD_LIKELY( tile_ks->state==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
     278           0 :           all_switched = 0UL;
     279           0 :           break;
     280           0 :         } else if( FD_UNLIKELY( tile_ks->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
     281           0 :           if( FD_LIKELY( !strcmp( topo->tiles[ i ].name, "sign" ) ) ) {
     282           0 :             FD_COMPILER_MFENCE();
     283           0 :             explicit_bzero( tile_ks->bytes, 64UL );
     284           0 :             FD_COMPILER_MFENCE();
     285           0 :           }
     286           0 :           continue;
     287           0 :         } else {
     288           0 :           FD_LOG_ERR(( "Unexpected %s keyswitch state %lu", topo->tiles[ i ].name, tile_ks->state ));
     289           0 :         }
     290           0 :       }
     291             : 
     292           0 :       if( FD_LIKELY( all_switched ) ) {
     293           0 :         FD_LOG_INFO(( "All tiles successfully switched identity key..." ));
     294           0 :         *state = FD_SET_IDENTITY_STATE_ALL_SWITCHED;
     295           0 :       } else {
     296           0 :         FD_SPIN_PAUSE();
     297           0 :       }
     298           0 :       break;
     299           0 :     }
     300           0 :     case FD_SET_IDENTITY_STATE_ALL_SWITCHED: {
     301           0 :       fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
     302           0 :       poh->state = FD_KEYSWITCH_STATE_UNHALT_PENDING;
     303           0 :       FD_LOG_INFO(( "Requesting to unpause leader pipeline..." ));
     304           0 :       *state = FD_SET_IDENTITY_STATE_POH_UNHALT_REQUESTED;
     305           0 :       break;
     306           0 :     }
     307           0 :     case FD_SET_IDENTITY_STATE_POH_UNHALT_REQUESTED: {
     308           0 :       fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
     309           0 :       if( FD_LIKELY( poh->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
     310           0 :         FD_LOG_INFO(( "Leader pipeline unpaused..." ));
     311           0 :         poh->state = FD_KEYSWITCH_STATE_UNLOCKED;
     312           0 :         *state = FD_SET_IDENTITY_STATE_UNLOCKED;
     313           0 :       } else if( FD_UNLIKELY( poh->state==FD_KEYSWITCH_STATE_UNHALT_PENDING ) ) {
     314           0 :         FD_SPIN_PAUSE();
     315           0 :       } else {
     316           0 :         FD_LOG_ERR(( "Unexpected poh keyswitch state %lu", poh->state ));
     317           0 :       }
     318           0 :       break;
     319           0 :     }
     320           0 :   }
     321           0 : }
     322             : 
     323             : void
     324             : set_identity_cmd_args( int *    pargc,
     325             :                        char *** pargv,
     326           0 :                        args_t * args) {
     327           0 :   args->set_identity.require_tower = fd_env_strip_cmdline_contains( pargc, pargv, "--require-tower" );
     328           0 :   args->set_identity.force         = fd_env_strip_cmdline_contains( pargc, pargv, "--force" );
     329             : 
     330           0 :   if( FD_UNLIKELY( *pargc<1 ) ) goto err;
     331             : 
     332           0 :   char const * path = *pargv[0];
     333           0 :   (*pargc)--;
     334           0 :   (*pargv)++;
     335             : 
     336           0 :   if( FD_UNLIKELY( !strcmp( path, "-" ) ) ) {
     337           0 :     args->set_identity.keypair = fd_keyload_alloc_protected_pages( 1UL, 2UL );
     338           0 :     FD_LOG_STDOUT(( "Reading identity keypair from stdin.  Press Ctrl-D when done.\n" ));
     339           0 :     fd_keyload_read( STDIN_FILENO, "stdin", args->set_identity.keypair );
     340           0 :   } else {
     341           0 :     args->set_identity.keypair = fd_keyload_load( path, 0 );
     342           0 :   }
     343             : 
     344           0 :   return;
     345             : 
     346           0 : err:
     347           0 :   FD_LOG_ERR(( "Usage: fdctl set-identity <keypair> [--require-tower]" ));
     348           0 : }
     349             : 
     350             : static void FD_FN_SENSITIVE
     351             : set_identity( args_t *   args,
     352           0 :               config_t * config ) {
     353           0 :   uchar check_public_key[ 32 ];
     354           0 :   fd_sha512_t sha512[1];
     355           0 :   FD_TEST( fd_sha512_join( fd_sha512_new( sha512 ) ) );
     356           0 :   fd_ed25519_public_from_private( check_public_key, args->set_identity.keypair, sha512 );
     357           0 :   if( FD_UNLIKELY( memcmp( check_public_key, args->set_identity.keypair+32UL, 32UL ) ) )
     358           0 :     FD_LOG_ERR(( "The public key in the identity key file does not match the public key derived from the private key. "
     359           0 :                  "Firedancer will not use the key pair to sign as it might leak the private key." ));
     360             : 
     361           0 :   for( ulong i=0UL; i<config->topo.obj_cnt; i++ ) {
     362           0 :     fd_topo_obj_t * obj = &config->topo.objs[ i ];
     363           0 :     if( FD_LIKELY( strcmp( obj->name, "keyswitch" ) ) ) continue;
     364             : 
     365           0 :     fd_topo_join_workspace( &config->topo, &config->topo.workspaces[ obj->wksp_id ], FD_SHMEM_JOIN_MODE_READ_WRITE );
     366           0 :   }
     367             : 
     368           0 :   int has_error = 0;
     369           0 :   ulong state = FD_SET_IDENTITY_STATE_UNLOCKED;
     370           0 :   ulong halted_seq = 0UL;
     371           0 :   for(;;) {
     372           0 :     poll_keyswitch( &config->topo, &state, &halted_seq, args->set_identity.keypair, &has_error, args->set_identity.require_tower, args->set_identity.force );
     373           0 :     if( FD_UNLIKELY( FD_SET_IDENTITY_STATE_UNLOCKED==state ) ) break;
     374           0 :   }
     375             : 
     376           0 :   char identity_key_base58[ FD_BASE58_ENCODED_32_SZ ];
     377           0 :   fd_base58_encode_32( args->set_identity.keypair+32UL, NULL, identity_key_base58 );
     378           0 :   identity_key_base58[ FD_BASE58_ENCODED_32_SZ-1UL ] = '\0';
     379             : 
     380           0 :   if( FD_UNLIKELY( has_error ) ) FD_LOG_ERR(( "Failed to switch identity key to `%s`, check validator logs for details", identity_key_base58 ));
     381           0 :   else                           FD_LOG_NOTICE(( "Validator identity key switched to `%s`", identity_key_base58 ));
     382           0 : }
     383             : 
     384             : void
     385             : set_identity_cmd_fn( args_t *   args,
     386           0 :                      config_t * config ) {
     387           0 :   set_identity( args, config );
     388           0 : }
     389             : 
     390             : action_t fd_action_set_identity = {
     391             :   .name           = "set-identity",
     392             :   .args           = set_identity_cmd_args,
     393             :   .fn             = set_identity_cmd_fn,
     394             :   .require_config = 1,
     395             :   .perm           = NULL,
     396             :   .description    = "Change the identity of a running validator",
     397             : };

Generated by: LCOV version 1.14