LCOV - code coverage report
Current view: top level - app/shared/commands - set_identity.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 225 0.0 %
Date: 2025-03-20 12:08:36 Functions: 0 6 0.0 %

          Line data    Source code
       1             : #define _GNU_SOURCE
       2             : #include "run/run.h"
       3             : 
       4             : #include "../fd_cap_chk.h"
       5             : #include "../../../disco/keyguard/fd_keyswitch.h"
       6             : #include "../../../disco/keyguard/fd_keyload.h"
       7             : #include "../../../tango/fd_tango.h"
       8             : #include "../../../util/fd_util.h"
       9             : 
      10             : #include <strings.h>
      11             : #include <unistd.h>
      12             : #include <sys/resource.h>
      13             : 
      14             : /* The process of switching identity of the validator is somewhat
      15             :    involved, to prevent it from producing torn data (for example,
      16             :    a block where half the shreds are signed by one private key, and half
      17             :    are signed by another).
      18             : 
      19             :    The process of switching is a state machine that progresses linearly
      20             :    through each of the states.  Generally, no transitions are allowed
      21             :    except direct forward steps, except in emergency recovery cases an
      22             :    operator can force the state back to unlocked.
      23             : 
      24             :    The states follow, in order. */
      25             : 
      26             : /* State 0: UNLOCKED.
      27             :      The validator is not currently in the process of switching keys. */
      28           0 : #define FD_SET_IDENTITY_STATE_UNLOCKED              (0UL)
      29             : 
      30             : /* State 1: LOCKED
      31             :      Some client to the validator has requested a key switch.  To do so,
      32             :      it acquired an exclusive lock on the validator to prevent the
      33             :      switch potentially being interleaved with another client. */
      34           0 : #define FD_SET_IDENTITY_STATE_LOCKED                (1UL)
      35             : 
      36             : /* State 2: POH_HALT_REQUESTED
      37             :      The first step in the key switch process is to pause the leader
      38             :      pipeline of the validator, preventing us from becoming leader, but
      39             :      finishing any currently in progress leader slot if there is one.
      40             :      While in this state, the validator is waiting for the leader
      41             :      pipeline to confirm that it has paused production, and is no longer
      42             :      leader.
      43             : 
      44             :      This halt request also causes the PoH tile to switch both:
      45             : 
      46             :        (a) The identity key used by the PoH tile itself, used to
      47             :            determine when this validator is leader in the schedule.
      48             : 
      49             :        (b) The key used by the Agave sub-process, if running
      50             :            Frankendancer.  The Agave key is inside a Mutex<> so it is
      51             :            swapped atomically across all consumers. */
      52           0 : #define FD_SET_IDENTITY_STATE_POH_HALT_REQUESTED    (2UL)
      53             : 
      54             : /* State 3: POH_HALTED
      55             :      The PoH tile has confirmed that it has halted the leader pipeline,
      56             :      and the validator is no longer leader.  No more blocks will be
      57             :      produced until it is unhalted.  In addition, the PoH tile has
      58             :      switched both its own identity key and the Agave key. */
      59           0 : #define FD_SET_IDENTITY_STATE_POH_HALTED            (3UL)
      60             : 
      61             : /* State 4: SHRED_FLUSH_REQUESTED
      62             :      Once the leader pipeline is halted, it must be flushed, meaning any
      63             :      in-flight shreds that could potentially need to be signed with the
      64             :      old key are signed and sent to the network.  This doesn't strictly
      65             :      need to happen before other tiles have their key flushed, but it
      66             :      makes the control flow easier to understand if we do this as an
      67             :      explicit step.
      68             : 
      69             :      The shred tile is flushed by telling it the last sequence number
      70             :      the PoH tile has produced for an outgoing shred, at the time it was
      71             :      halted, and then waiting for the shred tile to confirm that it has
      72             :      seen and processed all shreds up to and including that sequence
      73             :      number.
      74             : 
      75             :      In addition to flushing out any in-flight shreds, this also causes
      76             :      the shred tile to switch the identity key it uses internally, for
      77             :      determining where this validator is positioned in the Turbine tree. */
      78           0 : #define FD_SET_IDENTITY_STATE_SHRED_FLUSH_REQUESTED (4UL)
      79             : 
      80             : /* State 5: SHRED_FLUSHED
      81             :      The shred tile confirms that it has seen and processed all shreds
      82             :      up to and including the last sequence number produced by the PoH
      83             :      tile at the time it was halted.  The shred tile has also switched
      84             :      its own identity key when it indicates the flush is complete. */
      85           0 : #define FD_SET_IDENTITY_STATE_SHRED_FLUSHED         (5UL)
      86             : 
      87             : /* State 6: ALL_SWITCH_REQUESTED
      88             :      The client now requests that all other tiles which consume the
      89             :      identity key in some way switch to the new key.  The leader
      90             :      pipeline is still halted, although it doesn't strictly need to be,
      91             :      since outgoing shreds have been flushed.  This is done to keep the
      92             :      control flow simpler.
      93             : 
      94             :      The other tiles using the identity key are:
      95             : 
      96             :        (a) Sign.  The sign tile is responsible for holding the private
      97             :            key.
      98             :        (b) GUI.  The GUI shows the validator identity key to the user,
      99             :            and uses the key to determine which blocks are ours for
     100             :            highlighting on the frontend.
     101             :        (c) Event.  Outgoing events to the event server are signed with
     102             :            the identity key to authenticate the sender.
     103             :        (d) Bundle.  The validator must authenticate to any connected
     104             :            bundle server with the identity key to prove it is on the
     105             :            leader schedule. */
     106           0 : #define FD_SET_IDENTITY_STATE_ALL_SWITCH_REQUESTED  (6UL)
     107             : 
     108             : /* State 7: ALL_SWITCHED
     109             :      All remaining tiles that use the identity key have confirmed that
     110             :      they have switched to the new key.  The validator is now fully
     111             :      switched over. */
     112           0 : #define FD_SET_IDENTITY_STATE_ALL_SWITCHED          (7UL)
     113             : 
     114             : /* State 8: POH_UNHALT_REQUESTED
     115             :      The final state, now that all tiles have switched, the leader
     116             :      pipeline can be unblocked and the validator can resume producing
     117             :      blocks.  The next state once the PoH tile confirms the leader
     118             :      pipeline is unlocked, is UNLOCKED. */
     119           0 : #define FD_SET_IDENTITY_STATE_POH_UNHALT_REQUESTED  (8UL)
     120             : 
     121             : void
     122             : set_identity_cmd_perm( args_t *         args   FD_PARAM_UNUSED,
     123             :                        fd_cap_chk_t *   chk,
     124           0 :                        config_t const * config FD_PARAM_UNUSED ) {
     125             :   /* 5 huge pages for the key storage area */
     126           0 :   ulong mlock_limit = 5UL * FD_SHMEM_NORMAL_PAGE_SZ;
     127           0 :   fd_cap_chk_raise_rlimit( chk, "set-identity", RLIMIT_MEMLOCK, mlock_limit, "call `rlimit(2)` to increase `RLIMIT_MEMLOCK` so all memory can be locked with `mlock(2)`" );
     128           0 : }
     129             : 
     130             : fd_keyswitch_t *
     131             : find_keyswitch( fd_topo_t const * topo,
     132           0 :                 char const *      tile_name ) {
     133           0 :   ulong tile_idx = fd_topo_find_tile( topo, tile_name, 0UL );
     134           0 :   FD_TEST( tile_idx!=ULONG_MAX );
     135           0 :   FD_TEST( topo->tiles[ tile_idx ].keyswitch_obj_id!=ULONG_MAX );
     136             : 
     137           0 :   fd_keyswitch_t * keyswitch = fd_topo_obj_laddr( topo, topo->tiles[ tile_idx ].keyswitch_obj_id );
     138           0 :   FD_TEST( keyswitch );
     139           0 :   return keyswitch;
     140           0 : }
     141             : 
     142             : static void FD_FN_SENSITIVE
     143             : poll_keyswitch( fd_topo_t * topo,
     144             :                 ulong *     state,
     145             :                 ulong *     halted_seq,
     146             :                 uchar *     keypair,
     147             :                 int *       has_error,
     148             :                 int         require_tower,
     149           0 :                 int         force_lock ) {
     150           0 :   switch( *state ) {
     151           0 :     case FD_SET_IDENTITY_STATE_UNLOCKED: {
     152           0 :       fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
     153           0 :       if( FD_LIKELY( FD_KEYSWITCH_STATE_UNLOCKED==FD_ATOMIC_CAS( &poh->state, FD_KEYSWITCH_STATE_UNLOCKED, FD_KEYSWITCH_STATE_LOCKED ) ) ) {
     154           0 :         *state = FD_SET_IDENTITY_STATE_LOCKED;
     155           0 :         FD_LOG_INFO(( "Locking validator identity for key switch..." ));
     156           0 :       } else {
     157           0 :         if( FD_UNLIKELY( force_lock ) ) {
     158           0 :           *state = FD_SET_IDENTITY_STATE_LOCKED;
     159           0 :           FD_LOG_WARNING(( "Another process was changing keys, but `--force` supplied. Forcing lock on validator identity for key switch..." ));
     160           0 :         } else {
     161           0 :           FD_LOG_ERR(( "Cannot set-identity because Firedancer is already in the process of switching keys. If you are not currently "
     162           0 :                        "changing the identity, it might be because an identity change was abandoned. To recover, run the `set-identity` "
     163           0 :                        "command again with the `--force` argument." ));
     164           0 :         }
     165           0 :       }
     166           0 :       break;
     167           0 :     }
     168           0 :     case FD_SET_IDENTITY_STATE_LOCKED: {
     169           0 :       fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
     170           0 :       memcpy( poh->bytes, keypair, 64UL );
     171           0 :       poh->param = !!require_tower;
     172           0 :       FD_COMPILER_MFENCE();
     173           0 :       poh->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
     174           0 :       FD_COMPILER_MFENCE();
     175           0 :       *state = FD_SET_IDENTITY_STATE_POH_HALT_REQUESTED;
     176           0 :       FD_LOG_INFO(( "Pausing leader pipeline for key switch..." ));
     177           0 :       break;
     178           0 :     }
     179           0 :     case FD_SET_IDENTITY_STATE_POH_HALT_REQUESTED: {
     180           0 :       fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
     181           0 :       if( FD_LIKELY( poh->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
     182           0 :         explicit_bzero( poh->bytes, 64UL );
     183           0 :         FD_COMPILER_MFENCE();
     184           0 :         *halted_seq = poh->result;
     185           0 :         *state = FD_SET_IDENTITY_STATE_POH_HALTED;
     186           0 :         FD_LOG_INFO(( "Leader pipeline successfully paused..." ));
     187           0 :       } else if( FD_UNLIKELY( poh->state==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
     188           0 :         FD_SPIN_PAUSE();
     189           0 :       } else if( FD_LIKELY( poh->state==FD_KEYSWITCH_STATE_FAILED ) ) {
     190             :         /* Failed to switch identity in Agave, so abort the entire process. */
     191           0 :         *state = FD_SET_IDENTITY_STATE_ALL_SWITCHED;
     192           0 :         *has_error = 1;
     193           0 :       } else {
     194           0 :         FD_LOG_ERR(( "Unexpected poh keyswitch state %lu", poh->state ));
     195           0 :       }
     196           0 :       break;
     197           0 :     }
     198           0 :     case FD_SET_IDENTITY_STATE_POH_HALTED: {
     199           0 :       for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
     200           0 :         fd_topo_tile_t const * tile = &topo->tiles[ i ];
     201           0 :         if( FD_LIKELY( strcmp( tile->name, "shred" ) ) ) continue;
     202             : 
     203           0 :         fd_keyswitch_t * shred = fd_topo_obj_laddr( topo, tile->keyswitch_obj_id );
     204           0 :         FD_TEST( shred );
     205             : 
     206           0 :         shred->param = *halted_seq;
     207           0 :         memcpy( shred->bytes, keypair+32UL, 32UL );
     208           0 :         FD_COMPILER_MFENCE();
     209           0 :         shred->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
     210           0 :         FD_COMPILER_MFENCE();
     211           0 :         FD_LOG_INFO(( "Flushing in-flight unpublished shreds, must reach seq %lu...", *halted_seq ));
     212           0 :       }
     213             : 
     214           0 :       *state = FD_SET_IDENTITY_STATE_SHRED_FLUSH_REQUESTED;
     215           0 :       break;
     216           0 :     }
     217           0 :     case FD_SET_IDENTITY_STATE_SHRED_FLUSH_REQUESTED: {
     218           0 :       for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
     219           0 :         fd_topo_tile_t const * tile = &topo->tiles[ i ];
     220           0 :         if( FD_LIKELY( strcmp( tile->name, "shred" ) ) ) continue;
     221             : 
     222           0 :         fd_keyswitch_t * shred = fd_topo_obj_laddr( topo, tile->keyswitch_obj_id );
     223           0 :         FD_TEST( shred );
     224             : 
     225           0 :         if( FD_LIKELY( shred->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
     226           0 :           continue;
     227           0 :         } else if( FD_UNLIKELY( shred->state==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
     228             :           /* If any of the shred tiles is still pending, we need to wait. */
     229           0 :           FD_SPIN_PAUSE();
     230           0 :           return;
     231           0 :         } else {
     232           0 :           FD_LOG_ERR(( "Unexpected shred:%lu keyswitch state %lu", tile->kind_id, shred->state ));
     233           0 :         }
     234           0 :       }
     235             : 
     236           0 :       *state = FD_SET_IDENTITY_STATE_SHRED_FLUSHED;
     237           0 :       FD_LOG_INFO(( "All in-flight shreds published..." ));
     238           0 :       break;
     239           0 :     }
     240           0 :     case FD_SET_IDENTITY_STATE_SHRED_FLUSHED: {
     241           0 :       fd_keyswitch_t * sign = find_keyswitch( topo, "sign" );
     242           0 :       memcpy( sign->bytes, keypair, 64UL );
     243           0 :       FD_COMPILER_MFENCE();
     244           0 :       explicit_bzero( keypair, 32UL ); /* Private key no longer needed in this process */
     245           0 :       FD_COMPILER_MFENCE();
     246           0 :       sign->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
     247           0 :       FD_COMPILER_MFENCE();
     248             : 
     249           0 :       for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
     250           0 :         if( FD_LIKELY( topo->tiles[ i ].keyswitch_obj_id==ULONG_MAX ) ) continue;
     251           0 :         if( FD_LIKELY( !strcmp( topo->tiles[ i ].name, "sign" ) ||
     252           0 :                        !strcmp( topo->tiles[ i ].name, "poh" ) ||
     253           0 :                        !strcmp( topo->tiles[ i ].name, "shred" ) ) ) continue;
     254             : 
     255           0 :         fd_keyswitch_t * tile_ks = fd_topo_obj_laddr( topo, topo->tiles[ i ].keyswitch_obj_id );
     256           0 :         memcpy( tile_ks->bytes, keypair+32UL, 32UL );
     257           0 :         FD_COMPILER_MFENCE();
     258           0 :         tile_ks->state = FD_KEYSWITCH_STATE_SWITCH_PENDING;
     259           0 :         FD_COMPILER_MFENCE();
     260           0 :       }
     261             : 
     262           0 :       FD_LOG_INFO(( "Requesting all tiles switch identity key..." ));
     263           0 :       *state = FD_SET_IDENTITY_STATE_ALL_SWITCH_REQUESTED;
     264           0 :       break;
     265           0 :     }
     266           0 :     case FD_SET_IDENTITY_STATE_ALL_SWITCH_REQUESTED: {
     267           0 :       ulong all_switched = 1UL;
     268           0 :       for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
     269           0 :         if( FD_LIKELY( topo->tiles[ i ].keyswitch_obj_id==ULONG_MAX ) ) continue;
     270           0 :         if( FD_LIKELY( !strcmp( topo->tiles[ i ].name, "poh" ) ||
     271           0 :                        !strcmp( topo->tiles[ i ].name, "shred" ) ) ) continue;
     272             : 
     273           0 :         fd_keyswitch_t * tile_ks = fd_topo_obj_laddr( topo, topo->tiles[ i ].keyswitch_obj_id );
     274           0 :         if( FD_LIKELY( tile_ks->state==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
     275           0 :           all_switched = 0UL;
     276           0 :           break;
     277           0 :         } else if( FD_UNLIKELY( tile_ks->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
     278           0 :           if( FD_LIKELY( !strcmp( topo->tiles[ i ].name, "sign" ) ) ) {
     279           0 :             FD_COMPILER_MFENCE();
     280           0 :             explicit_bzero( tile_ks->bytes, 64UL );
     281           0 :             FD_COMPILER_MFENCE();
     282           0 :           }
     283           0 :           continue;
     284           0 :         } else {
     285           0 :           FD_LOG_ERR(( "Unexpected %s keyswitch state %lu", topo->tiles[ i ].name, tile_ks->state ));
     286           0 :         }
     287           0 :       }
     288             : 
     289           0 :       if( FD_LIKELY( all_switched ) ) {
     290           0 :         FD_LOG_INFO(( "All tiles successfully switched identity key..." ));
     291           0 :         *state = FD_SET_IDENTITY_STATE_ALL_SWITCHED;
     292           0 :       } else {
     293           0 :         FD_SPIN_PAUSE();
     294           0 :       }
     295           0 :       break;
     296           0 :     }
     297           0 :     case FD_SET_IDENTITY_STATE_ALL_SWITCHED: {
     298           0 :       fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
     299           0 :       poh->state = FD_KEYSWITCH_STATE_UNHALT_PENDING;
     300           0 :       FD_LOG_INFO(( "Requesting to unpause leader pipeline..." ));
     301           0 :       *state = FD_SET_IDENTITY_STATE_POH_UNHALT_REQUESTED;
     302           0 :       break;
     303           0 :     }
     304           0 :     case FD_SET_IDENTITY_STATE_POH_UNHALT_REQUESTED: {
     305           0 :       fd_keyswitch_t * poh = find_keyswitch( topo, "poh" );
     306           0 :       if( FD_LIKELY( poh->state==FD_KEYSWITCH_STATE_COMPLETED ) ) {
     307           0 :         FD_LOG_INFO(( "Leader pipeline unpaused..." ));
     308           0 :         poh->state = FD_KEYSWITCH_STATE_UNLOCKED;
     309           0 :         *state = FD_SET_IDENTITY_STATE_UNLOCKED;
     310           0 :       } else if( FD_UNLIKELY( poh->state==FD_KEYSWITCH_STATE_UNHALT_PENDING ) ) {
     311           0 :         FD_SPIN_PAUSE();
     312           0 :       } else {
     313           0 :         FD_LOG_ERR(( "Unexpected poh keyswitch state %lu", poh->state ));
     314           0 :       }
     315           0 :       break;
     316           0 :     }
     317           0 :   }
     318           0 : }
     319             : 
     320             : void
     321             : set_identity_cmd_args( int *    pargc,
     322             :                        char *** pargv,
     323           0 :                        args_t * args) {
     324           0 :   args->set_identity.require_tower = fd_env_strip_cmdline_contains( pargc, pargv, "--require-tower" );
     325           0 :   args->set_identity.force         = fd_env_strip_cmdline_contains( pargc, pargv, "--force" );
     326             : 
     327           0 :   if( FD_UNLIKELY( *pargc<1 ) ) goto err;
     328             : 
     329           0 :   char const * path = *pargv[0];
     330           0 :   (*pargc)--;
     331           0 :   (*pargv)++;
     332             : 
     333           0 :   if( FD_UNLIKELY( !strcmp( path, "-" ) ) ) {
     334           0 :     args->set_identity.keypair = fd_keyload_alloc_protected_pages( 1UL, 2UL );
     335           0 :     FD_LOG_STDOUT(( "Reading identity keypair from stdin.  Press Ctrl-D when done.\n" ));
     336           0 :     fd_keyload_read( STDIN_FILENO, "stdin", args->set_identity.keypair );
     337           0 :   } else {
     338           0 :     args->set_identity.keypair = fd_keyload_load( path, 0 );
     339           0 :   }
     340             : 
     341           0 :   return;
     342             : 
     343           0 : err:
     344           0 :   FD_LOG_ERR(( "Usage: fdctl set-identity <keypair> [--require-tower]" ));
     345           0 : }
     346             : 
     347             : static void FD_FN_SENSITIVE
     348             : set_identity( args_t *   args,
     349           0 :               config_t * config ) {
     350           0 :   uchar check_public_key[ 32 ];
     351           0 :   fd_sha512_t sha512[1];
     352           0 :   FD_TEST( fd_sha512_join( fd_sha512_new( sha512 ) ) );
     353           0 :   fd_ed25519_public_from_private( check_public_key, args->set_identity.keypair, sha512 );
     354           0 :   if( FD_UNLIKELY( memcmp( check_public_key, args->set_identity.keypair+32UL, 32UL ) ) )
     355           0 :     FD_LOG_ERR(( "The public key in the identity key file does not match the public key derived from the private key. "
     356           0 :                  "Firedancer will not use the key pair to sign as it might leak the private key." ));
     357             : 
     358           0 :   for( ulong i=0UL; i<config->topo.obj_cnt; i++ ) {
     359           0 :     fd_topo_obj_t * obj = &config->topo.objs[ i ];
     360           0 :     if( FD_LIKELY( strcmp( obj->name, "keyswitch" ) ) ) continue;
     361             : 
     362           0 :     fd_topo_join_workspace( &config->topo, &config->topo.workspaces[ obj->wksp_id ], FD_SHMEM_JOIN_MODE_READ_WRITE );
     363           0 :   }
     364             : 
     365           0 :   int has_error = 0;
     366           0 :   ulong state = FD_SET_IDENTITY_STATE_UNLOCKED;
     367           0 :   ulong halted_seq = 0UL;
     368           0 :   for(;;) {
     369           0 :     poll_keyswitch( &config->topo, &state, &halted_seq, args->set_identity.keypair, &has_error, args->set_identity.require_tower, args->set_identity.force );
     370           0 :     if( FD_UNLIKELY( FD_SET_IDENTITY_STATE_UNLOCKED==state ) ) break;
     371           0 :   }
     372             : 
     373           0 :   char identity_key_base58[ FD_BASE58_ENCODED_32_SZ ];
     374           0 :   fd_base58_encode_32( args->set_identity.keypair+32UL, NULL, identity_key_base58 );
     375           0 :   identity_key_base58[ FD_BASE58_ENCODED_32_SZ-1UL ] = '\0';
     376             : 
     377           0 :   if( FD_UNLIKELY( has_error ) ) FD_LOG_ERR(( "Failed to switch identity key to `%s`, check validator logs for details", identity_key_base58 ));
     378           0 :   else                           FD_LOG_NOTICE(( "Validator identity key switched to `%s`", identity_key_base58 ));
     379           0 : }
     380             : 
     381             : void
     382             : set_identity_cmd_fn( args_t *   args,
     383           0 :                      config_t * config ) {
     384           0 :   set_identity( args, config );
     385           0 : }

Generated by: LCOV version 1.14