LCOV - code coverage report
Current view: top level - disco/topo - fd_topo_run.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 232 0.0 %
Date: 2025-07-15 04:56:17 Functions: 0 9 0.0 %

          Line data    Source code
       1             : #define _GNU_SOURCE
       2             : #include "fd_topo.h"
       3             : 
       4             : #include "../metrics/fd_metrics.h"
       5             : #include "../../waltz/xdp/fd_xdp1.h"
       6             : #include "../../util/tile/fd_tile_private.h"
       7             : 
       8             : #include <unistd.h>
       9             : #include <signal.h>
      10             : #include <errno.h>
      11             : #include <pthread.h>
      12             : #include <sys/syscall.h>
      13             : #include <linux/futex.h>
      14             : #include <sys/resource.h>
      15             : #include <sys/prctl.h>
      16             : #include <sys/stat.h>
      17             : #include <sys/mman.h>
      18             : #include <net/if.h>
      19             : 
      20             : static void
      21             : initialize_logging( char const * tile_name,
      22             :                     ulong        tile_kind_id,
      23             :                     ulong        pid,
      24           0 :                     ulong        tid ) {
      25           0 :   fd_log_cpu_set( NULL );
      26           0 :   fd_log_private_tid_set( pid );
      27           0 :   char thread_name[ 20 ];
      28           0 :   FD_TEST( fd_cstr_printf_check( thread_name, sizeof( thread_name ), NULL, "%s:%lu", tile_name, tile_kind_id ) );
      29           0 :   fd_log_thread_set( thread_name );
      30           0 :   fd_log_private_stack_discover( FD_TILE_PRIVATE_STACK_SZ,
      31           0 :                                  &fd_tile_private_stack0, &fd_tile_private_stack1 );
      32           0 :   FD_LOG_NOTICE(( "booting tile %s pid:%lu tid:%lu", thread_name, fd_log_group_id(), tid ));
      33           0 : }
      34             : 
      35             : static void
      36             : check_wait_debugger( ulong          pid,
      37             :                      volatile int * wait,
      38           0 :                      volatile int * debugger ) {
      39           0 :   if( FD_UNLIKELY( debugger ) ) {
      40           0 :     FD_LOG_WARNING(( "waiting for debugger to attach to tile pid:%lu", pid ));
      41           0 :     if( FD_UNLIKELY( -1==kill( getpid(), SIGSTOP ) ) )
      42           0 :       FD_LOG_ERR(( "kill(SIGSTOP) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
      43           0 :     *FD_VOLATILE( debugger ) = 1;
      44           0 :   }
      45             : 
      46           0 :   if( FD_UNLIKELY( wait ) ) {
      47           0 :     while( FD_LIKELY( !*FD_VOLATILE( wait ) ) ) FD_SPIN_PAUSE();
      48           0 :   }
      49           0 : }
      50             : 
      51             : void
      52             : fd_topo_run_tile( fd_topo_t *          topo,
      53             :                   fd_topo_tile_t *     tile,
      54             :                   int                  sandbox,
      55             :                   int                  keep_controlling_terminal,
      56             :                   int                  dumpable,
      57             :                   uint                 uid,
      58             :                   uint                 gid,
      59             :                   int                  allow_fd,
      60             :                   volatile int *       wait,
      61             :                   volatile int *       debugger,
      62           0 :                   fd_topo_run_tile_t * tile_run ) {
      63           0 :   char thread_name[ 20 ];
      64           0 :   FD_TEST( fd_cstr_printf_check( thread_name, sizeof( thread_name ), NULL, "%s:%lu", tile->name, tile->kind_id ) );
      65           0 :   if( FD_UNLIKELY( prctl( PR_SET_NAME, thread_name, 0, 0, 0 ) ) ) FD_LOG_ERR(( "prctl(PR_SET_NAME) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
      66             : 
      67           0 :   ulong pid = fd_sandbox_getpid(); /* Need to read /proc again.. we got a new PID from clone */
      68           0 :   ulong tid = fd_sandbox_gettid(); /* Need to read /proc again.. we got a new TID from clone */
      69             : 
      70           0 :   check_wait_debugger( pid, wait, debugger );
      71           0 :   initialize_logging( tile->name, tile->kind_id, pid, tid );
      72             : 
      73             :   /* preload shared memory before sandboxing, so it is already mapped */
      74           0 :   fd_topo_join_tile_workspaces( topo, tile );
      75             : 
      76           0 :   if( FD_UNLIKELY( tile_run->privileged_init ) )
      77           0 :     tile_run->privileged_init( topo, tile );
      78             : 
      79           0 :   ulong allow_fds_offset = 0UL;
      80           0 :   int allow_fds[ 256 ] = { 0 };
      81           0 :   if( FD_LIKELY( -1!=allow_fd ) ) {
      82           0 :     allow_fds_offset = 1UL;
      83           0 :     allow_fds[ 0 ] = allow_fd;
      84           0 :   }
      85           0 :   ulong allow_fds_cnt = 0UL;
      86           0 :   if( FD_LIKELY( tile_run->populate_allowed_fds ) ) {
      87           0 :     allow_fds_cnt = tile_run->populate_allowed_fds( topo,
      88           0 :                                                     tile,
      89           0 :                                                     (sizeof(allow_fds)/sizeof(allow_fds[ 0 ]))-allow_fds_offset,
      90           0 :                                                     allow_fds+allow_fds_offset );
      91           0 :   }
      92             : 
      93             : 
      94           0 :   struct sock_filter seccomp_filter[ 128UL ];
      95           0 :   ulong seccomp_filter_cnt = 0UL;
      96           0 :   if( FD_LIKELY( tile_run->populate_allowed_seccomp ) ) {
      97           0 :     seccomp_filter_cnt = tile_run->populate_allowed_seccomp( topo,
      98           0 :                                                              tile,
      99           0 :                                                              sizeof(seccomp_filter)/sizeof(seccomp_filter[ 0 ]),
     100           0 :                                                              seccomp_filter );
     101           0 :   }
     102             : 
     103           0 :   ulong rlimit_file_cnt = tile_run->rlimit_file_cnt;
     104           0 :   if( tile_run->rlimit_file_cnt_fn ) {
     105           0 :     rlimit_file_cnt = tile_run->rlimit_file_cnt_fn( topo, tile );
     106           0 :   }
     107             : 
     108           0 :   if( FD_LIKELY( sandbox ) ) {
     109           0 :     fd_sandbox_enter( uid,
     110           0 :                       gid,
     111           0 :                       tile_run->keep_host_networking,
     112           0 :                       tile_run->allow_connect,
     113           0 :                       keep_controlling_terminal,
     114           0 :                       dumpable,
     115           0 :                       rlimit_file_cnt,
     116           0 :                       tile_run->rlimit_address_space,
     117           0 :                       tile_run->rlimit_data,
     118           0 :                       allow_fds_cnt+allow_fds_offset,
     119           0 :                       allow_fds,
     120           0 :                       seccomp_filter_cnt,
     121           0 :                       seccomp_filter );
     122           0 :   } else {
     123           0 :     fd_sandbox_switch_uid_gid( uid, gid );
     124           0 :   }
     125             : 
     126             :   /* Now we are sandboxed, join all the tango IPC objects in the workspaces */
     127           0 :   fd_topo_fill_tile( topo, tile );
     128             : 
     129           0 :   FD_TEST( tile->metrics );
     130           0 :   fd_metrics_register( tile->metrics );
     131             : 
     132           0 :   FD_MGAUGE_SET( TILE, PID, pid );
     133           0 :   FD_MGAUGE_SET( TILE, TID, tid );
     134             : 
     135           0 :   if( FD_UNLIKELY( tile_run->unprivileged_init ) )
     136           0 :     tile_run->unprivileged_init( topo, tile );
     137             : 
     138           0 :   tile_run->run( topo, tile );
     139           0 :   if( FD_UNLIKELY( !tile->allow_shutdown ) ) FD_LOG_ERR(( "tile %s:%lu run loop returned", tile->name, tile->kind_id ));
     140             : 
     141           0 :   FD_MGAUGE_SET( TILE, STATUS, 2UL );
     142           0 : }
     143             : 
     144             : typedef struct {
     145             :   fd_topo_t *        topo;
     146             :   fd_topo_tile_t *   tile;
     147             :   fd_topo_run_tile_t tile_run;
     148             :   uint               uid;
     149             :   uint               gid;
     150             :   volatile int       copied;
     151             :   void *             stack_lo;
     152             :   void *             stack_hi;
     153             : } fd_topo_run_thread_args_t;
     154             : 
     155             : static void *
     156           0 : run_tile_thread_main( void * _args ) {
     157           0 :   fd_topo_run_thread_args_t args = *(fd_topo_run_thread_args_t *)_args;
     158           0 :   FD_COMPILER_MFENCE();
     159           0 :   ((fd_topo_run_thread_args_t *)_args)->copied = 1;
     160             : 
     161             :   /* Prevent fork() from smashing the stack */
     162           0 :   if( FD_UNLIKELY( madvise( args.stack_lo, (ulong)args.stack_hi - (ulong)args.stack_lo, MADV_DONTFORK ) ) ) {
     163           0 :     FD_LOG_ERR(( "madvise(stack,MADV_DONTFORK) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     164           0 :   }
     165             : 
     166           0 :   fd_topo_run_tile( args.topo, args.tile, 0, 1, 1, args.uid, args.gid, -1, NULL, NULL, &args.tile_run );
     167           0 :   FD_TEST( args.tile->allow_shutdown );
     168           0 :   return NULL;
     169           0 : }
     170             : 
     171             : /* fd_topo_tile_stack_join_anon is a variant of fd_topo_tile_stack_join
     172             :    that acquires private anonymous memory instead of shared pages.
     173             : 
     174             :    This is required for fork() to work, as the parent and child process
     175             :    would otherwise share a stack and corrupt each other.  While fork()
     176             :    is banned in tile user code, some dynamic analysis tools (like MSan)
     177             :    unfortunately rely on it. */
     178             : 
     179             : FD_FN_UNUSED static void *
     180           0 : fd_topo_tile_stack_join_anon( void ) {
     181           0 : 
     182           0 :   ulong sz    = 2*FD_TILE_PRIVATE_STACK_SZ;
     183           0 :   int   prot  = PROT_READ|PROT_WRITE;
     184           0 :   int   flags = MAP_PRIVATE|MAP_ANONYMOUS|MAP_STACK;
     185           0 : 
     186           0 :   uchar * stack = MAP_FAILED;
     187           0 : #if !FD_HAS_ASAN && !FD_HAS_MSAN
     188           0 :   stack = mmap( NULL, sz, prot, flags|MAP_HUGETLB, -1, 0 );
     189           0 : #endif
     190           0 : 
     191           0 :   if( stack==MAP_FAILED ) {
     192           0 :     stack = mmap( NULL, sz, prot, flags, -1, 0 );
     193           0 :     if( FD_UNLIKELY( stack==MAP_FAILED ) ) {
     194           0 :       FD_LOG_ERR(( "mmap() for stack failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     195           0 :     }
     196           0 :   }
     197           0 : 
     198           0 :   /* Create the guard regions in the extra space */
     199           0 :   void * guard_lo = (void *)( stack - FD_SHMEM_NORMAL_PAGE_SZ );
     200           0 :   if( FD_UNLIKELY( mmap( guard_lo, FD_SHMEM_NORMAL_PAGE_SZ, PROT_NONE,
     201           0 :                          MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, (off_t)0 )!=guard_lo ) )
     202           0 :     FD_LOG_ERR(( "mmap(%p) failed (%i-%s)", guard_lo, errno, fd_io_strerror( errno ) ));
     203           0 : 
     204           0 :   void * guard_hi = (void *)( stack + FD_TILE_PRIVATE_STACK_SZ );
     205           0 :   if( FD_UNLIKELY( mmap( guard_hi, FD_SHMEM_NORMAL_PAGE_SZ, PROT_NONE,
     206           0 :                          MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, (off_t)0 )!=guard_hi ) )
     207           0 :     FD_LOG_ERR(( "mmap(%p) failed (%i-%s)", guard_hi, errno, fd_io_strerror( errno ) ));
     208           0 : 
     209           0 :   return stack;
     210           0 : }
     211             : 
     212             : void *
     213             : fd_topo_tile_stack_join( char const * app_name,
     214             :                          char const * tile_name,
     215           0 :                          ulong        tile_kind_id ) {
     216             : #if FD_HAS_MSAN
     217             :   return fd_topo_tile_stack_join_anon();
     218             : #endif
     219             : 
     220           0 :   char name[ PATH_MAX ];
     221           0 :   FD_TEST( fd_cstr_printf_check( name, PATH_MAX, NULL, "%s_stack_%s%lu", app_name, tile_name, tile_kind_id ) );
     222             : 
     223           0 :   uchar * stack = fd_shmem_join( name, FD_SHMEM_JOIN_MODE_READ_WRITE, NULL, NULL, NULL, 1 );
     224           0 :   if( FD_UNLIKELY( !stack ) ) FD_LOG_ERR(( "fd_shmem_join failed" ));
     225             : 
     226             :   /* Make space for guard lo and guard hi */
     227           0 :   if( FD_UNLIKELY( fd_shmem_release( stack, FD_SHMEM_HUGE_PAGE_SZ, 1UL ) ) )
     228           0 :     FD_LOG_ERR(( "fd_shmem_release (%d-%s)", errno, fd_io_strerror( errno ) ));
     229           0 :   stack += FD_SHMEM_HUGE_PAGE_SZ;
     230           0 :   if( FD_UNLIKELY( fd_shmem_release( stack + FD_TILE_PRIVATE_STACK_SZ, FD_SHMEM_HUGE_PAGE_SZ, 1UL ) ) )
     231           0 :     FD_LOG_ERR(( "fd_shmem_release (%d-%s)", errno, fd_io_strerror( errno ) ));
     232             : 
     233             :   /* Create the guard regions in the extra space */
     234           0 :   void * guard_lo = (void *)(stack - FD_SHMEM_NORMAL_PAGE_SZ );
     235           0 :   if( FD_UNLIKELY( mmap( guard_lo, FD_SHMEM_NORMAL_PAGE_SZ, PROT_NONE,
     236           0 :                          MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, (off_t)0 )!=guard_lo ) )
     237           0 :     FD_LOG_ERR(( "mmap failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     238             : 
     239           0 :   void * guard_hi = (void *)(stack + FD_TILE_PRIVATE_STACK_SZ);
     240           0 :   if( FD_UNLIKELY( mmap( guard_hi, FD_SHMEM_NORMAL_PAGE_SZ, PROT_NONE,
     241           0 :                          MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, (off_t)0 )!=guard_hi ) )
     242           0 :     FD_LOG_ERR(( "mmap failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     243             : 
     244           0 :   return stack;
     245           0 : }
     246             : 
     247             : fd_xdp_fds_t
     248             : fd_topo_install_xdp( fd_topo_t const * topo,
     249           0 :                      uint              bind_addr ) {
     250           0 :   ulong net0_tile_idx = fd_topo_find_tile( topo, "net", 0UL );
     251           0 :   FD_TEST( net0_tile_idx!=ULONG_MAX );
     252           0 :   fd_topo_tile_t const * net0_tile = &topo->tiles[ net0_tile_idx ];
     253             : 
     254           0 :   ushort udp_port_candidates[] = {
     255           0 :     (ushort)net0_tile->xdp.net.legacy_transaction_listen_port,
     256           0 :     (ushort)net0_tile->xdp.net.quic_transaction_listen_port,
     257           0 :     (ushort)net0_tile->xdp.net.shred_listen_port,
     258           0 :     (ushort)net0_tile->xdp.net.gossip_listen_port,
     259           0 :     (ushort)net0_tile->xdp.net.repair_intake_listen_port,
     260           0 :     (ushort)net0_tile->xdp.net.repair_serve_listen_port,
     261           0 :     (ushort)net0_tile->xdp.net.send_src_port,
     262           0 :   };
     263             : 
     264           0 :   uint if_idx = if_nametoindex( net0_tile->xdp.interface );
     265           0 :   if( FD_UNLIKELY( !if_idx ) ) FD_LOG_ERR(( "if_nametoindex(%s) failed", net0_tile->xdp.interface ));
     266             : 
     267           0 :   fd_xdp_fds_t xdp_fds = fd_xdp_install( if_idx,
     268           0 :                                          bind_addr,
     269           0 :                                          sizeof(udp_port_candidates)/sizeof(udp_port_candidates[0]),
     270           0 :                                          udp_port_candidates,
     271           0 :                                          net0_tile->xdp.xdp_mode );
     272           0 :   if( FD_UNLIKELY( -1==dup2( xdp_fds.xsk_map_fd, 123462 ) ) ) FD_LOG_ERR(( "dup2() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     273           0 :   if( FD_UNLIKELY( -1==close( xdp_fds.xsk_map_fd ) ) ) FD_LOG_ERR(( "close() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     274           0 :   if( FD_UNLIKELY( -1==dup2( xdp_fds.prog_link_fd, 123463 ) ) ) FD_LOG_ERR(( "dup2() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     275           0 :   if( FD_UNLIKELY( -1==close( xdp_fds.prog_link_fd ) ) ) FD_LOG_ERR(( "close() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     276             : 
     277           0 :   xdp_fds.xsk_map_fd = 123462;
     278           0 :   xdp_fds.prog_link_fd = 123463;
     279             : 
     280           0 :   return xdp_fds;
     281           0 : }
     282             : 
     283             : static inline void
     284             : run_tile_thread( fd_topo_t *         topo,
     285             :                  fd_topo_tile_t *    tile,
     286             :                  fd_topo_run_tile_t  tile_run,
     287             :                  uint                uid,
     288             :                  uint                gid,
     289             :                  fd_cpuset_t const * floating_cpu_set,
     290           0 :                  int                 floating_priority ) {
     291             :   /* tpool will assign a thread later */
     292           0 :   if( FD_UNLIKELY( tile_run.for_tpool ) ) return;
     293           0 :   void * stack = fd_topo_tile_stack_join( topo->app_name, tile->name, tile->kind_id );
     294             : 
     295           0 :   pthread_attr_t attr[ 1 ];
     296           0 :   if( FD_UNLIKELY( pthread_attr_init( attr ) ) ) FD_LOG_ERR(( "pthread_attr_init() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     297           0 :   if( FD_UNLIKELY( pthread_attr_setstack( attr, stack, FD_TILE_PRIVATE_STACK_SZ ) ) ) FD_LOG_ERR(( "pthread_attr_setstacksize() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     298             : 
     299           0 :   FD_CPUSET_DECL( cpu_set );
     300           0 :   if( FD_LIKELY( tile->cpu_idx<65535UL ) ) {
     301             :     /* set the thread affinity before we clone the new process to ensure
     302             :        kernel first touch happens on the desired thread. */
     303           0 :     fd_cpuset_insert( cpu_set, tile->cpu_idx );
     304           0 :     if( FD_UNLIKELY( -1==setpriority( PRIO_PROCESS, 0, -19 ) ) ) FD_LOG_ERR(( "setpriority() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     305           0 :   } else {
     306           0 :     fd_memcpy( cpu_set, floating_cpu_set, fd_cpuset_footprint() );
     307           0 :     if( FD_UNLIKELY( -1==setpriority( PRIO_PROCESS, 0, floating_priority ) ) ) FD_LOG_ERR(( "setpriority() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     308           0 :   }
     309             : 
     310           0 :   if( FD_UNLIKELY( fd_cpuset_setaffinity( 0, cpu_set ) ) ) {
     311           0 :     if( FD_LIKELY( errno==EINVAL ) ) {
     312           0 :       FD_LOG_ERR(( "Unable to set the thread affinity for tile %s:%lu on cpu %lu. It is likely that the affinity "
     313           0 :                    "you have specified for this tile in [layout.affinity] of your configuration file contains a "
     314           0 :                    "CPU (%lu) which does not exist on this machine.",
     315           0 :                    tile->name, tile->kind_id, tile->cpu_idx, tile->cpu_idx ));
     316           0 :     } else {
     317           0 :       FD_LOG_ERR(( "sched_setaffinity failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     318           0 :     }
     319           0 :   }
     320             : 
     321           0 :   fd_topo_run_thread_args_t args = {
     322           0 :     .topo       = topo,
     323           0 :     .tile       = tile,
     324           0 :     .tile_run   = tile_run,
     325           0 :     .uid        = uid,
     326           0 :     .gid        = gid,
     327           0 :     .copied     = 0,
     328           0 :     .stack_lo   = stack,
     329           0 :     .stack_hi   = (uchar *)stack + FD_TILE_PRIVATE_STACK_SZ
     330           0 :   };
     331             : 
     332           0 :   pthread_t pthread;
     333           0 :   if( FD_UNLIKELY( pthread_create( &pthread, attr, run_tile_thread_main, &args ) ) ) FD_LOG_ERR(( "pthread_create() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     334             : 
     335           0 :   while( !FD_VOLATILE( args.copied ) ) FD_SPIN_PAUSE();
     336           0 : }
     337             : 
     338             : void
     339             : fd_topo_run_single_process( fd_topo_t *       topo,
     340             :                             int               agave,
     341             :                             uint              uid,
     342             :                             uint              gid,
     343           0 :                             fd_topo_run_tile_t (* tile_run )( fd_topo_tile_t const * tile ) ) {
     344             :   /* Save the current affinity, it will be restored after creating any child tiles */
     345           0 :   FD_CPUSET_DECL( floating_cpu_set );
     346           0 :   if( FD_UNLIKELY( fd_cpuset_getaffinity( 0, floating_cpu_set ) ) )
     347           0 :     FD_LOG_ERR(( "sched_getaffinity failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     348             : 
     349           0 :   errno = 0;
     350           0 :   int save_priority = getpriority( PRIO_PROCESS, 0 );
     351           0 :   if( FD_UNLIKELY( -1==save_priority && errno ) ) FD_LOG_ERR(( "getpriority() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     352             : 
     353           0 :   for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
     354           0 :     fd_topo_tile_t * tile = &topo->tiles[ i ];
     355           0 :     if( !agave && tile->is_agave ) continue;
     356           0 :     if( agave==1 && !tile->is_agave ) continue;
     357             : 
     358           0 :     fd_topo_run_tile_t run_tile = tile_run( tile );
     359           0 :     run_tile_thread( topo, tile, run_tile, uid, gid, floating_cpu_set, save_priority );
     360           0 :   }
     361             : 
     362           0 :   fd_sandbox_switch_uid_gid( uid, gid );
     363             : 
     364           0 :   if( FD_UNLIKELY( -1==setpriority( PRIO_PROCESS, 0, save_priority ) ) ) FD_LOG_ERR(( "setpriority() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     365           0 :   if( FD_UNLIKELY( fd_cpuset_setaffinity( 0, floating_cpu_set ) ) )
     366           0 :     FD_LOG_ERR(( "sched_setaffinity failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     367           0 : }

Generated by: LCOV version 1.14