LCOV - code coverage report
Current view: top level - util/sandbox - fd_sandbox.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 405 0.0 %
Date: 2025-01-08 12:08:44 Functions: 0 18 0.0 %

          Line data    Source code
       1             : #define _GNU_SOURCE
       2             : #include "fd_sandbox_private.h"
       3             : 
       4             : #include "../cstr/fd_cstr.h"
       5             : #include "../log/fd_log.h"
       6             : 
       7             : #include <fcntl.h>
       8             : #include <stdlib.h>
       9             : #include <errno.h>
      10             : #include <unistd.h>
      11             : #include <sched.h>
      12             : #include <dirent.h>
      13             : #include <sys/stat.h>
      14             : #include <sys/wait.h>
      15             : #include <sys/prctl.h>
      16             : #include <sys/mount.h>
      17             : #include <sys/random.h>
      18             : #include <sys/syscall.h>
      19             : #include <sys/resource.h>
      20             : #include <linux/keyctl.h>
      21             : #include <linux/seccomp.h>
      22             : #include <linux/securebits.h>
      23             : #include <linux/capability.h>
      24             : 
      25             : #if !defined(__linux__)
      26             : #error "Target operating system is unsupported by seccomp."
      27             : #endif
      28             : 
      29             : #if !defined(__x86_64__) && !defined(__aarch64__)
      30             : #error "Target architecture is unsupported by seccomp."
      31             : #else
      32             : 
      33             : #ifndef SYS_landlock_create_ruleset
      34           0 : #define SYS_landlock_create_ruleset 444
      35             : #endif
      36             : 
      37             : #ifndef SYS_landlock_restrict_self
      38           0 : #define SYS_landlock_restrict_self 446
      39             : #endif
      40             : 
      41             : #endif
      42             : 
      43             : void
      44             : fd_sandbox_private_switch_uid_gid( uint desired_uid,
      45             :                                    uint desired_gid );
      46             : 
      47             : static int
      48           0 : check_unshare_eacces_main( void * _arg ) {
      49           0 :   ulong arg = (ulong)_arg;
      50           0 :   uint desired_uid = (uint)((arg >>  0UL) & 0xFFFFUL);
      51           0 :   uint desired_gid = (uint)((arg >> 32UL) & 0xFFFFUL);
      52             : 
      53           0 :   fd_sandbox_private_switch_uid_gid( desired_uid, desired_gid );
      54           0 :   int result = unshare( CLONE_NEWUSER );
      55           0 :   if( -1==result && errno==EACCES ) return 255;
      56           0 :   else if( -1==result ) FD_LOG_ERR(( "unshare(CLONE_NEWUSER) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
      57           0 :   result = open( "/proc/self/setgroups", O_WRONLY );
      58           0 :   if( -1==result && errno==EACCES ) return 255;
      59           0 :   if( -1==result ) FD_LOG_ERR(( "open(/proc/self/setgroups) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
      60           0 :   return 0;
      61           0 : }
      62             : 
      63             : int
      64             : fd_sandbox_requires_cap_sys_admin( uint desired_uid,
      65           0 :                                    uint desired_gid ) {
      66             : 
      67             :   /* Check for the `unprivileged_userns_clone` sysctl which restricts
      68             :      unprivileged user namespaces on Debian. */
      69             : 
      70           0 :   int fd = open( "/proc/sys/kernel/unprivileged_userns_clone", O_RDONLY );
      71           0 :   if( -1==fd && errno!=ENOENT ) FD_LOG_ERR(( "open(/proc/sys/kernel/unprivileged_userns_clone) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
      72           0 :   else if( -1!=fd ) {
      73           0 :     char buf[ 16 ] = {0};
      74           0 :     long count = read( fd, buf, sizeof( buf ) );
      75           0 :     if( -1L==count )                         FD_LOG_ERR(( "read(/proc/sys/kernel/unprivileged_userns_clone) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
      76           0 :     if( (ulong)count>=sizeof( buf ) )        FD_LOG_ERR(( "read(/proc/sys/kernel/unprivileged_userns_clone) returned truncated data" ));
      77           0 :     if( 0L!=read( fd, buf, sizeof( buf ) ) ) FD_LOG_ERR(( "read(/proc/sys/kernel/unprivileged_userns_clone) did not return all the data" ));
      78             : 
      79           0 :     char * end;
      80           0 :     ulong unprivileged_userns_clone = strtoul( buf, &end, 10 );
      81           0 :     if( *end!='\n' ) FD_LOG_ERR(( "read(/proc/sys/kernel/unprivileged_userns_clone) returned malformed data" ));
      82           0 :     if( close( fd ) ) FD_LOG_ERR(( "close(/proc/sys/kernel/unprivileged_userns_clone) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
      83             : 
      84           0 :     if( unprivileged_userns_clone!=0 && unprivileged_userns_clone!=1 ) FD_LOG_ERR(( "unprivileged_userns_clone has unexpected value %lu", unprivileged_userns_clone ));
      85             : 
      86           0 :     if( !unprivileged_userns_clone ) return 1;
      87           0 :   }
      88             : 
      89             :   /* Check for EACCES when actually trying to create a user namespace,
      90             :      which indicates an Ubuntu, AppArmor, or SELinux restriction.  We do
      91             :      this in a forked process so it doesn't unintentionally sandbox the
      92             :      caller.  Actually we can't fork here, because the stack might be
      93             :      MAP_SHARED, so do it in a clone with a new stack instead.
      94             : 
      95             :      From Ubuntu 23.10 til 24.04, user namespace creation is disallowed
      96             :      by default and trying to create one as an unprivileged user will
      97             :      return EACCES.
      98             : 
      99             :      From Ubuntu 24.04 onwards, user namespace creation is allowed, but
     100             :      trying to write to /proc/self/setgroups or set the UID/GID maps
     101             :      within the namespace will return EACCES. */
     102             : 
     103           0 :   do {
     104           0 :     uchar child_stack[ 2097152 ]; /* 2 MiB */
     105           0 :     ulong arg = ((ulong)desired_uid << 0UL) | (((ulong)desired_gid) << 32UL);
     106           0 :     int child_pid = clone( check_unshare_eacces_main, child_stack+sizeof(child_stack), 0, (void*)arg );
     107           0 :     if( -1==child_pid ) FD_LOG_ERR(( "clone() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     108             : 
     109           0 :     int wstatus;
     110           0 :     if( -1==waitpid( child_pid, &wstatus, __WALL ) )            FD_LOG_ERR(( "waitpid() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     111           0 :     if( WIFSIGNALED( wstatus ) )                                FD_LOG_ERR(( "user namespace privilege checking process terminated by signal %i-%s", WTERMSIG( wstatus ), fd_io_strsignal( WTERMSIG( wstatus ) ) ));
     112           0 :     if( WEXITSTATUS( wstatus ) && WEXITSTATUS( wstatus )!=255 ) FD_LOG_ERR(( "user namespace privilege checking process exited with status %i", WEXITSTATUS( wstatus ) ));
     113             : 
     114           0 :     if( WEXITSTATUS( wstatus ) ) return 1;
     115           0 :   } while(0);
     116             : 
     117           0 :   return 0;
     118           0 : }
     119             : 
     120             : extern char ** environ;
     121             : 
     122             : void FD_FN_SENSITIVE
     123           0 : fd_sandbox_private_explicit_clear_environment_variables( void ) {
     124           0 :   if( !environ ) return;
     125             : 
     126           0 :   for( char * const * env = environ; *env; env++ ) {
     127           0 :     ulong len = strlen( *env );
     128           0 :     explicit_bzero( *env, len );
     129           0 :   }
     130             : 
     131           0 :   if( clearenv() ) FD_LOG_ERR(( "clearenv failed" ));
     132           0 : }
     133             : 
     134             : void
     135             : fd_sandbox_private_check_exact_file_descriptors( ulong       allowed_file_descriptor_cnt,
     136           0 :                                                  int const * allowed_file_descriptor ) {
     137           0 :   if( allowed_file_descriptor_cnt>256UL ) FD_LOG_ERR(( "allowed_file_descriptors_cnt must not be more than 256" ));
     138           0 :   int seen_fds[ 256 ] = {0};
     139             : 
     140           0 :   for( ulong i=0UL; i<allowed_file_descriptor_cnt; i++ ) {
     141           0 :     if( allowed_file_descriptor[ i ]<0 || allowed_file_descriptor[ i ]==INT_MAX )
     142           0 :       FD_LOG_ERR(( "allowed_file_descriptors contains invalid file descriptor %d", allowed_file_descriptor[ i ] ));
     143           0 :   }
     144             : 
     145           0 :   for( ulong i=0UL; i<allowed_file_descriptor_cnt; i++ ) {
     146           0 :     for( ulong j=0UL; j<allowed_file_descriptor_cnt; j++ ) {
     147           0 :       if( i==j ) continue;
     148           0 :       if( allowed_file_descriptor[ i ]==allowed_file_descriptor[ j ] )
     149           0 :         FD_LOG_ERR(( "allowed_file_descriptor contains duplicate entry %d", allowed_file_descriptor[ i ] ));
     150           0 :     }
     151           0 :   }
     152             : 
     153           0 :   int dirfd = open( "/proc/self/fd", O_RDONLY | O_DIRECTORY );
     154           0 :   if( dirfd<0 ) FD_LOG_ERR(( "open(/proc/self/fd) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     155             : 
     156           0 :   for(;;) {
     157             :     /* The getdents64() syscall ABI does not require that buf is aligned,
     158             :        since dent->d_name field is variable length, the records are not
     159             :        always aligned and the cast below is going to be unaligned anyway
     160             :        however...
     161             : 
     162             :        If we don't align it the compiler might prove somthing weird and
     163             :        trash this code, and also ASAN would flag it as an error.  So we
     164             :        just align it anyway. */
     165           0 :     uchar buf[ 4096 ] __attribute__((aligned(alignof(struct dirent64))));
     166             : 
     167           0 :     long dents_bytes = syscall( SYS_getdents64, dirfd, buf, sizeof( buf ) );
     168           0 :     if( !dents_bytes ) break;
     169           0 :     else if( -1L==dents_bytes ) FD_LOG_ERR(( "getdents64() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     170             : 
     171           0 :     ulong offset = 0UL;
     172           0 :     while( offset<(ulong)dents_bytes ) {
     173           0 :       struct dirent64 const * dent = (struct dirent64 const *)(buf + offset);
     174           0 :       if( !strcmp( dent->d_name, "." ) || !strcmp( dent->d_name, ".." ) ) {
     175           0 :         offset += dent->d_reclen;
     176           0 :         continue;
     177           0 :       }
     178             : 
     179           0 :       char * end;
     180           0 :       long _fd = strtol( dent->d_name, &end, 10 );
     181           0 :       if( *end != '\0' ) FD_LOG_ERR(( "/proc/self/pid has unrecognized entry name %s", dent->d_name ));
     182           0 :       if( _fd>=INT_MAX ) FD_LOG_ERR(( "/proc/self/pid has file descriptor number %ld which is too large", _fd ));
     183           0 :       int fd = (int)_fd;
     184             : 
     185           0 :       if( fd==dirfd ) {
     186           0 :         offset += dent->d_reclen;
     187           0 :         continue;
     188           0 :       }
     189             : 
     190           0 :       int found = 0;
     191           0 :       for( ulong i=0UL; i<allowed_file_descriptor_cnt; i++ ) {
     192           0 :         if( fd==allowed_file_descriptor[ i ] ) {
     193           0 :           if( seen_fds[ i ] ) FD_LOG_ERR(( "/proc/self/fd contained the same file descriptor (%d) twice", fd ));
     194           0 :           seen_fds[ i ] = 1;
     195           0 :           found = 1;
     196           0 :           break;
     197           0 :         }
     198           0 :       }
     199             : 
     200           0 :       if( !found ) {
     201           0 :         char path[ PATH_MAX ];
     202           0 :         FD_TEST( fd_cstr_printf_check( path, sizeof( path ), NULL, "/proc/self/fd/%d", fd ) );
     203             : 
     204           0 :         char target[ PATH_MAX ];
     205           0 :         long count = readlink( path, target, PATH_MAX );
     206           0 :         if( count<0L        ) FD_LOG_ERR(( "readlink(%s) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
     207           0 :         if( count>=PATH_MAX ) FD_LOG_ERR(( "readlink(%s) returned truncated path", path ));
     208           0 :         target[ count ] = '\0';
     209             : 
     210           0 :         FD_LOG_ERR(( "unexpected file descriptor %d open %s", fd, target ));
     211           0 :       }
     212             : 
     213           0 :       offset += dent->d_reclen;
     214           0 :     }
     215           0 :   }
     216             : 
     217           0 :   for( ulong i=0UL; i<allowed_file_descriptor_cnt; i++ ) {
     218           0 :     if( !seen_fds[ i ] ) FD_LOG_ERR(( "allowed file descriptor %d not present", allowed_file_descriptor[ i ] ));
     219           0 :   }
     220             : 
     221           0 :   if( close( dirfd ) ) FD_LOG_ERR(( "close(/proc/self/fd) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     222           0 : }
     223             : 
     224             : void
     225             : fd_sandbox_private_switch_uid_gid( uint desired_uid,
     226           0 :                                    uint desired_gid ) {
     227             :   /* We do a small hack: in development environments we sometimes want
     228             :      to run all tiles in a single process.  In that case, the sandbox
     229             :      doesn't get created except that we still switch to the desired uid
     230             :      and gid.
     231             : 
     232             :      There's a problem with this: POSIX states that all threads in a
     233             :      process must have the same uid and gid, so glibc does some wacky
     234             :      stuff... from man 2 setresgid
     235             : 
     236             :         C library/kernel differences
     237             :             At the kernel level, user IDs and group IDs are a per-thread
     238             :             attribute.  However, POSIX requires that all threads in a
     239             :             process share the same credentials.  The NPTL threading
     240             :             implementation handles the POSIX requirements by providing
     241             :             wrapper functions for the various system calls that change
     242             :             process UIDs and GIDs.  These  wrap‐ per functions
     243             :             (including those for setresuid() and setresgid()) employ a
     244             :             signal-based technique to ensure that when one thread
     245             :             changes credentials, all of the other threads in the process
     246             :             also change their credentials.  For details, see nptl(7).
     247             : 
     248             :       We know all of our threads in this development case are going to
     249             :       switch to the target uid/gid at their own leisure (they need to
     250             :       so they can do privileged steps before dropping root), so to
     251             :       align this behavior between production and development, we invoke
     252             :       the syscall directly and do not let glibc switch uid/gid on the
     253             :       other threads in the process. */
     254           0 :   int changed = 0;
     255           0 :   gid_t curgid, curegid, cursgid;
     256           0 :   if( -1==getresgid( &curgid, &curegid, &cursgid ) ) FD_LOG_ERR(( "getresgid failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     257           0 :   if( desired_gid!=curgid || desired_gid!=curegid || desired_gid!=cursgid ) {
     258           0 :     if( -1==syscall( __NR_setresgid, desired_gid, desired_gid, desired_gid ) ) FD_LOG_ERR(( "setresgid failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     259           0 :     changed = 1;
     260           0 :   }
     261             : 
     262           0 :   uid_t curuid, cureuid, cursuid;
     263           0 :   if( -1==getresuid( &curuid, &cureuid, &cursuid ) ) FD_LOG_ERR(( "getresuid failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     264           0 :   if( desired_uid!=curuid || desired_uid!=cureuid || desired_uid!=cursuid ) {
     265           0 :     if( -1==syscall( __NR_setresuid, desired_uid, desired_uid, desired_uid ) ) FD_LOG_ERR(( "setresuid failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     266           0 :     changed = 1;
     267           0 :   }
     268             : 
     269             :   /* Calling setresgid/setresuid sets the dumpable bit to 0 which
     270             :      prevents debugging and stops us from setting our uid/gid maps in
     271             :      the user namespace so restore it if it was changed. */
     272           0 :   if( changed ) {
     273           0 :     if( -1==prctl( PR_SET_DUMPABLE, 1 ) ) FD_LOG_ERR(( "prctl(PR_SET_DUMPABLE, 1) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     274           0 :   }
     275           0 : }
     276             : 
     277             : void
     278             : fd_sandbox_private_write_userns_uid_gid_maps( uint uid_in_parent,
     279           0 :                                               uint gid_in_parent ) {
     280           0 :   int setgroups_fd = open( "/proc/self/setgroups", O_WRONLY );
     281           0 :   if( FD_UNLIKELY( setgroups_fd<0 ) )                       FD_LOG_ERR(( "open(/proc/self/setgroups) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     282             : 
     283           0 :   long written = write( setgroups_fd, "deny", strlen( "deny" ) );
     284           0 :   if( FD_UNLIKELY( -1L==written ) )                         FD_LOG_ERR(( "write(/proc/self/setgroups) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     285           0 :   else if( FD_UNLIKELY( written!=(long)strlen( "deny" ) ) ) FD_LOG_ERR(( "write(/proc/self/setgroups) failed to write all data" ));
     286           0 :   if( FD_UNLIKELY( close( setgroups_fd ) ) )                FD_LOG_ERR(( "close(/proc/self/setgroups) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     287             : 
     288           0 :   static char const * MAP_PATHS[] = {
     289           0 :     "/proc/self/uid_map",
     290           0 :     "/proc/self/gid_map",
     291           0 :   };
     292             : 
     293           0 :   uint ids[] = {
     294           0 :     uid_in_parent,
     295           0 :     gid_in_parent
     296           0 :   };
     297             : 
     298           0 :   for( ulong i=0UL; i<2UL; i++ ) {
     299           0 :     int fd = open( MAP_PATHS[ i ], O_WRONLY );
     300           0 :     if( -1==fd )                              FD_LOG_ERR(( "open(%s) failed (%i-%s)", MAP_PATHS[ i ], errno, fd_io_strerror( errno ) ));
     301             : 
     302           0 :     char map_line[ 64 ];
     303           0 :     FD_TEST( fd_cstr_printf_check( map_line, sizeof( map_line ), NULL, "1 %u 1\n", ids[ i ] ) );
     304           0 :     long written = write( fd, map_line, strlen( map_line ) );
     305           0 :     if( -1L==written )                        FD_LOG_ERR(( "write(%s) failed (%i-%s)", MAP_PATHS[ i ], errno, fd_io_strerror( errno ) ));
     306           0 :     if( written != (long)strlen( map_line ) ) FD_LOG_ERR(( "write(%s) failed to write all data", MAP_PATHS[ i ] ));
     307           0 :     if( close( fd ) )                         FD_LOG_ERR(( "close(%s) failed (%i-%s)", MAP_PATHS[ i ], errno, fd_io_strerror( errno ) ));
     308           0 :   }
     309           0 : }
     310             : 
     311             : void
     312           0 : fd_sandbox_private_deny_namespaces( void ) {
     313           0 :   static char const * SYSCTLS[] = {
     314           0 :     "/proc/sys/user/max_user_namespaces",
     315           0 :     "/proc/sys/user/max_mnt_namespaces",
     316           0 :     "/proc/sys/user/max_cgroup_namespaces",
     317           0 :     "/proc/sys/user/max_ipc_namespaces",
     318           0 :     "/proc/sys/user/max_net_namespaces",
     319           0 :     "/proc/sys/user/max_pid_namespaces",
     320           0 :     "/proc/sys/user/max_uts_namespaces",
     321           0 :   };
     322             : 
     323           0 :   static char const * VALUES[] = {
     324           0 :     "1", /* One user namespace is allowed, to created the nested child. */
     325           0 :     "2", /* Two mount namespaces are allowed, the one in the parent user namespace, and the one we will use to pivot the root in the child namespace */
     326           0 :     "0",
     327           0 :     "0",
     328           0 :     "0",
     329           0 :     "0",
     330           0 :     "0",
     331           0 :   };
     332             : 
     333           0 :   for( ulong i=0UL; i<sizeof(SYSCTLS)/sizeof(SYSCTLS[ 0 ]); i++) {
     334           0 :     int fd = open( SYSCTLS[ i ], O_WRONLY );
     335           0 :     if( fd<0 )                       FD_LOG_ERR(( "open(%s) failed (%i-%s)", SYSCTLS[ i ], errno, fd_io_strerror( errno ) ));
     336             : 
     337           0 :     long written = write( fd, VALUES[ i ], 1 );
     338           0 :     if( written==-1 )                FD_LOG_ERR(( "write(%s) failed (%i-%s)", SYSCTLS[ i ], errno, fd_io_strerror( errno ) ));
     339           0 :     else if( written!=1 )            FD_LOG_ERR(( "write(%s) failed to write data", SYSCTLS[ i ] ));
     340           0 :     if( FD_UNLIKELY( close( fd ) ) ) FD_LOG_ERR(( "close(%s) failed (%i-%s)", SYSCTLS[ i ], errno, fd_io_strerror( errno ) ));
     341           0 :   }
     342           0 : }
     343             : 
     344             : void
     345           0 : fd_sandbox_private_pivot_root( void ) {
     346             :   /* The steps taken here to unmount the filesystem and jail us into an
     347             :      empty location look incredibly strange, but are a somewhat standard
     348             :      pattern copied from other sandboxes.  For a couple of examples, see
     349             : 
     350             :         https://github.com/firecracker-microvm/firecracker/blob/main/src/jailer/src/chroot.rs
     351             :         https://github.com/hpc/charliecloud/blob/master/bin/ch-checkns.c
     352             :         https://github.com/opencontainers/runc/blob/HEAD/libcontainer/rootfs_linux.go#L671
     353             :         https://github.com/lxc/lxc/blob/HEAD/src/lxc/conf.c#L1121
     354             :         https://github.com/containers/bubblewrap/blob/main/bubblewrap.c#L3196
     355             : 
     356             :      The core problem is that calling pivot_root(2) will fail if the
     357             :      list of mounts in the namespace is not arranged very carefully. */
     358             : 
     359           0 :   if( -1==unshare( CLONE_NEWNS ) )                                              FD_LOG_ERR(( "unshare(CLONE_NEWNS) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     360             : 
     361           0 :   ulong bytes;
     362           0 :   if( 8UL!=getrandom( &bytes, sizeof( bytes ), 0 ) )                            FD_LOG_ERR(( "getrandom() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     363             : 
     364           0 :   char new_root_path[ PATH_MAX ];
     365           0 :   FD_TEST( fd_cstr_printf_check( new_root_path, sizeof( new_root_path ), NULL, "/tmp/fd_sandbox_%lu", bytes ) );
     366             : 
     367           0 :   if( -1==mkdir( new_root_path, S_IRUSR | S_IWUSR | S_IXUSR ) )                 FD_LOG_ERR(( "mkdir(%s, 0700) failed (%i-%s)", new_root_path, errno, fd_io_strerror( errno ) ));
     368           0 :   if( -1==mount( NULL, "/", NULL, MS_SLAVE | MS_REC, NULL ) )                   FD_LOG_ERR(( "mount(NULL, /, NULL, MS_SLAVE | MS_REC, NULL) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     369           0 :   if( -1==mount( new_root_path, new_root_path, NULL, MS_BIND | MS_REC, NULL ) ) FD_LOG_ERR(( "mount(%s, %s, NULL, MS_BIND | MS_REC, NULL) failed (%i-%s)", new_root_path, new_root_path, errno, fd_io_strerror( errno ) ));
     370           0 :   if( -1==chdir( new_root_path ) )                                              FD_LOG_ERR(( "chdir(%s) failed (%i-%s)", new_root_path, errno, fd_io_strerror( errno ) ));
     371           0 :   if( -1==syscall( SYS_pivot_root, ".", "." ) )                                 FD_LOG_ERR(( "pivot_root(., .) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     372           0 :   if( -1==umount2( ".", MNT_DETACH ) )                                          FD_LOG_ERR(( "umount2(., MNT_DETACH) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     373           0 :   if( -1==chdir( "/" ) )                                                        FD_LOG_ERR(( "chdir(/) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     374           0 : }
     375             : 
     376             : struct rlimit_setting {
     377             : #ifdef __GLIBC__
     378             :   __rlimit_resource_t resource;
     379             : #else /* non-glibc */
     380             :   int resource;
     381             : #endif /* __GLIBC__ */
     382             : 
     383             :   ulong limit;
     384             : };
     385             : 
     386             : void
     387           0 : fd_sandbox_private_set_rlimits( ulong rlimit_file_cnt ) {
     388           0 :   struct rlimit_setting rlimits[] = {
     389           0 :     { .resource=RLIMIT_NOFILE,     .limit=rlimit_file_cnt },
     390             :     /* The man page for setrlimit(2) states about RLIMIT_NICE:
     391             : 
     392             :           The useful range for this limit is thus from 1 (corresponding
     393             :           to a nice value of 19) to 40 (corresponding to a nice value of
     394             :           -20).
     395             : 
     396             :        But this is misleading.  The range of values is from 0 to 40,
     397             :        even though the "useful" range is 1 to 40, because a value of 0
     398             :        and a value of 1 for the rlimit both map to a nice value of 19.
     399             : 
     400             :        But... if you attempt to call setrlimit( RLIMIT_NICE, 1 ) without
     401             :        CAP_SYS_RESOURCE, and the hard limit is already 0, you will get
     402             :        EPERM, so we actually have to set the limit to 0 here, not 1. */
     403           0 :     { .resource=RLIMIT_NICE,       .limit=0UL             },
     404             : 
     405           0 :     { .resource=RLIMIT_AS,         .limit=0UL             },
     406           0 :     { .resource=RLIMIT_CORE,       .limit=0UL             },
     407           0 :     { .resource=RLIMIT_DATA,       .limit=0UL             },
     408           0 :     { .resource=RLIMIT_MEMLOCK,    .limit=0UL             },
     409           0 :     { .resource=RLIMIT_MSGQUEUE,   .limit=0UL             },
     410           0 :     { .resource=RLIMIT_NPROC,      .limit=0UL             },
     411           0 :     { .resource=RLIMIT_RTPRIO,     .limit=0UL             },
     412           0 :     { .resource=RLIMIT_RTTIME,     .limit=0UL             },
     413           0 :     { .resource=RLIMIT_SIGPENDING, .limit=0UL             },
     414           0 :     { .resource=RLIMIT_STACK,      .limit=0UL             },
     415             : 
     416             :     /* Resources that can't be restricted. */
     417             :     // { .resource=RLIMIT_CPU,        .limit=0UL             },
     418             :     // { .resource=RLIMIT_FSIZE,      .limit=0UL             },
     419             : 
     420             :     /* Deprecated resources, not used. */
     421             :     // { .resource=RLIMIT_LOCKS,      .limit=0UL             },
     422             :     // { .resource=RLIMIT_RSS,        .limit=0UL             },
     423           0 :   };
     424             : 
     425           0 :   for( ulong i=0UL; i<sizeof(rlimits)/sizeof(rlimits[ 0 ]); i++ ) {
     426           0 :     struct rlimit limit = { .rlim_cur=rlimits[ i ].limit, .rlim_max=rlimits[ i ].limit };
     427           0 :     if( -1==setrlimit( rlimits[ i ].resource, &limit ) ) FD_LOG_ERR(( "setrlimit(%u) failed (%i-%s)", rlimits[ i ].resource, errno, fd_io_strerror( errno ) ));
     428           0 :   }
     429           0 : }
     430             : 
     431             : void
     432           0 : fd_sandbox_private_drop_caps( ulong cap_last_cap ) {
     433           0 :   if( -1==prctl( PR_SET_SECUREBITS,
     434           0 :                  SECBIT_KEEP_CAPS_LOCKED | SECBIT_NO_SETUID_FIXUP |
     435           0 :                     SECBIT_NO_SETUID_FIXUP_LOCKED | SECBIT_NOROOT |
     436           0 :                     SECBIT_NOROOT_LOCKED | SECBIT_NO_CAP_AMBIENT_RAISE |
     437           0 :                     SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED ) ) FD_LOG_ERR(( "prctl(PR_SET_SECUREBITS) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     438             : 
     439           0 :   for( ulong cap=0UL; cap<=cap_last_cap; cap++ ) {
     440           0 :     if( -1==prctl( PR_CAPBSET_DROP, cap, 0, 0, 0 ) ) FD_LOG_ERR(( "prctl(PR_CAPBSET_DROP) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     441           0 :   }
     442             : 
     443           0 :   struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 };
     444           0 :   struct __user_cap_data_struct   data[2] = { { 0 } };
     445           0 :   if( -1==syscall( SYS_capset, &hdr, data ) )                          FD_LOG_ERR(( "syscall(SYS_capset) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     446           0 :   if( -1==prctl( PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0 ) ) FD_LOG_ERR(( "prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     447           0 : }
     448             : 
     449           0 : #define LANDLOCK_CREATE_RULESET_VERSION (1U << 0)
     450             : 
     451           0 : #define LANDLOCK_ACCESS_FS_EXECUTE      (1ULL << 0)
     452           0 : #define LANDLOCK_ACCESS_FS_WRITE_FILE   (1ULL << 1)
     453           0 : #define LANDLOCK_ACCESS_FS_READ_FILE    (1ULL << 2)
     454           0 : #define LANDLOCK_ACCESS_FS_READ_DIR     (1ULL << 3)
     455           0 : #define LANDLOCK_ACCESS_FS_REMOVE_DIR   (1ULL << 4)
     456           0 : #define LANDLOCK_ACCESS_FS_REMOVE_FILE  (1ULL << 5)
     457           0 : #define LANDLOCK_ACCESS_FS_MAKE_CHAR    (1ULL << 6)
     458           0 : #define LANDLOCK_ACCESS_FS_MAKE_DIR     (1ULL << 7)
     459           0 : #define LANDLOCK_ACCESS_FS_MAKE_REG     (1ULL << 8)
     460           0 : #define LANDLOCK_ACCESS_FS_MAKE_SOCK    (1ULL << 9)
     461           0 : #define LANDLOCK_ACCESS_FS_MAKE_FIFO    (1ULL << 10)
     462           0 : #define LANDLOCK_ACCESS_FS_MAKE_BLOCK   (1ULL << 11)
     463           0 : #define LANDLOCK_ACCESS_FS_MAKE_SYM     (1ULL << 12)
     464           0 : #define LANDLOCK_ACCESS_FS_REFER        (1ULL << 13)
     465           0 : #define LANDLOCK_ACCESS_FS_TRUNCATE     (1ULL << 14)
     466           0 : #define LANDLOCK_ACCESS_FS_IOCTL_DEV    (1ULL << 15)
     467             : 
     468           0 : #define LANDLOCK_ACCESS_NET_BIND_TCP    (1ULL << 0)
     469           0 : #define LANDLOCK_ACCESS_NET_CONNECT_TCP (1ULL << 1)
     470             : 
     471             : struct landlock_ruleset_attr {
     472             :     __u64 handled_access_fs;
     473             :     __u64 handled_access_net;
     474             : };
     475             : 
     476             : void
     477           0 : fd_sandbox_private_landlock_restrict_self( void ) {
     478           0 :   struct landlock_ruleset_attr attr = {
     479           0 :     .handled_access_fs =
     480           0 :       LANDLOCK_ACCESS_FS_EXECUTE |
     481           0 :       LANDLOCK_ACCESS_FS_WRITE_FILE |
     482           0 :       LANDLOCK_ACCESS_FS_READ_FILE |
     483           0 :       LANDLOCK_ACCESS_FS_READ_DIR |
     484           0 :       LANDLOCK_ACCESS_FS_REMOVE_DIR |
     485           0 :       LANDLOCK_ACCESS_FS_REMOVE_FILE |
     486           0 :       LANDLOCK_ACCESS_FS_MAKE_CHAR |
     487           0 :       LANDLOCK_ACCESS_FS_MAKE_DIR |
     488           0 :       LANDLOCK_ACCESS_FS_MAKE_REG |
     489           0 :       LANDLOCK_ACCESS_FS_MAKE_SOCK |
     490           0 :       LANDLOCK_ACCESS_FS_MAKE_FIFO |
     491           0 :       LANDLOCK_ACCESS_FS_MAKE_BLOCK |
     492           0 :       LANDLOCK_ACCESS_FS_MAKE_SYM |
     493           0 :       LANDLOCK_ACCESS_FS_REFER |
     494           0 :       LANDLOCK_ACCESS_FS_TRUNCATE |
     495           0 :       LANDLOCK_ACCESS_FS_IOCTL_DEV,
     496           0 :     .handled_access_net =
     497           0 :       LANDLOCK_ACCESS_NET_BIND_TCP |
     498           0 :       LANDLOCK_ACCESS_NET_CONNECT_TCP,
     499           0 :   };
     500             : 
     501           0 :   long abi = syscall( SYS_landlock_create_ruleset, NULL, 0, LANDLOCK_CREATE_RULESET_VERSION );
     502           0 :   if( -1L==abi && (errno==ENOSYS || errno==EOPNOTSUPP ) ) return;
     503           0 :   else if( -1L==abi ) FD_LOG_ERR(( "landlock_create_ruleset() failed (%i-%s).", errno, fd_io_strerror( errno ) ));
     504             : 
     505           0 :   switch (abi) {
     506           0 :   case 1L:
     507             :       /* Removes LANDLOCK_ACCESS_FS_REFER for ABI < 2 */
     508           0 :       attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_REFER;
     509           0 :       __attribute__((fallthrough));
     510           0 :   case 2L:
     511             :       /* Removes LANDLOCK_ACCESS_FS_TRUNCATE for ABI < 3 */
     512           0 :       attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_TRUNCATE;
     513           0 :       __attribute__((fallthrough));
     514           0 :   case 3L:
     515             :       /* Removes network support for ABI < 4 */
     516           0 :       attr.handled_access_net &=
     517           0 :           ~(LANDLOCK_ACCESS_NET_BIND_TCP |
     518           0 :             LANDLOCK_ACCESS_NET_CONNECT_TCP);
     519           0 :       __attribute__((fallthrough));
     520           0 :   case 4L:
     521             :       /* Removes LANDLOCK_ACCESS_FS_IOCTL_DEV for ABI < 5 */
     522           0 :       attr.handled_access_fs &= ~LANDLOCK_ACCESS_FS_IOCTL_DEV;
     523           0 :   }
     524             : 
     525           0 :   long landlock_fd = syscall( SYS_landlock_create_ruleset, &attr, 16, 0 );
     526           0 :   if( -1L==landlock_fd ) FD_LOG_ERR(( "landlock_create_ruleset() failed (%i-%s).", errno, fd_io_strerror( errno ) ));
     527             : 
     528           0 :   if( syscall( SYS_landlock_restrict_self, landlock_fd, 0 ) ) FD_LOG_ERR(( "landlock_restrict_self() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     529           0 : }
     530             : 
     531             : void
     532             : fd_sandbox_private_set_seccomp_filter( ushort               seccomp_filter_cnt,
     533           0 :                                        struct sock_filter * seccomp_filter ) {
     534           0 :   struct sock_fprog program = {
     535           0 :     .len    = seccomp_filter_cnt,
     536           0 :     .filter = seccomp_filter,
     537           0 :   };
     538             : 
     539           0 :   if( syscall( SYS_seccomp, SECCOMP_SET_MODE_FILTER, 0, &program ) ) FD_LOG_ERR(( "seccomp() failed (%i-%s)", errno, fd_io_strerror( errno ) ) );
     540           0 : }
     541             : 
     542             : ulong
     543           0 : fd_sandbox_private_read_cap_last_cap( void ) {
     544           0 :   int fd = open( "/proc/sys/kernel/cap_last_cap", O_RDONLY );
     545           0 :   if( -1==fd ) FD_LOG_ERR(( "open(/proc/sys/kernel/cap_last_cap) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     546           0 :   char buf[ 16 ] = {0};
     547           0 :   long count = read( fd, buf, sizeof( buf ) );
     548           0 :   if( -1L==count ) FD_LOG_ERR(( "read(/proc/sys/kernel/cap_last_cap) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     549           0 :   if( (ulong)count>=sizeof( buf ) ) FD_LOG_ERR(( "read(/proc/sys/kernel/cap_last_cap) returned truncated data" ));
     550           0 :   if( 0L!=read( fd, buf, sizeof( buf ) ) ) FD_LOG_ERR(( "read(/proc/sys/kernel/cap_last_cap) did not return all the data" ));
     551             : 
     552           0 :   char * end;
     553           0 :   ulong cap_last_cap = strtoul( buf, &end, 10 );
     554           0 :   if( *end!='\n' ) FD_LOG_ERR(( "read(/proc/sys/kernel/cap_last_cap) returned malformed data" ));
     555           0 :   if( close( fd ) ) FD_LOG_ERR(( "close(/proc/sys/kernel/cap_last_cap) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     556           0 :   if( !cap_last_cap || cap_last_cap>128 ) FD_LOG_ERR(( "read(/proc/sys/kernel/cap_last_cap) returned invalid data" ));
     557             : 
     558           0 :   return cap_last_cap;
     559           0 : }
     560             : 
     561             : void
     562             : fd_sandbox_private_enter_no_seccomp( uint        desired_uid,
     563             :                                      uint        desired_gid,
     564             :                                      int         keep_host_networking,
     565             :                                      int         keep_controlling_terminal,
     566             :                                      ulong       rlimit_file_cnt,
     567             :                                      ulong       allowed_file_descriptor_cnt,
     568           0 :                                      int const * allowed_file_descriptor ) {
     569             :   /* Read the highest capability index on the currently running kernel
     570             :      from /proc */
     571           0 :   ulong cap_last_cap = fd_sandbox_private_read_cap_last_cap();
     572             : 
     573             :   /* The ordering here is quite delicate and should be preserved ...
     574             : 
     575             :       | Action                 | Must happen before          | Reason
     576             :       |------------------------|-----------------------------|-------------------------------------
     577             :       | Check file descriptors | Pivot root                  | Requires access to /proc filesystem
     578             :       | Clear groups           | Unshare namespaces          | Cannot call setgroups(2) in user namespace
     579             :       | Unshare namespaces     | Pivot root                  | Pivot root requires CAP_SYS_ADMIN
     580             :       | Pivot root             | Drop caps                   | Requires CAP_SYS_ADMIN
     581             :       | Pivot root             | Landlock                    | Accesses the filesystem
     582             :       | Landlock               | Set resource limits         | Creates a file descriptor
     583             :       | Set resource limits    | Drop caps                   | Requires CAP_SYS_RESOURCE */
     584           0 :   fd_sandbox_private_explicit_clear_environment_variables();
     585           0 :   fd_sandbox_private_check_exact_file_descriptors( allowed_file_descriptor_cnt, allowed_file_descriptor );
     586             : 
     587             :   /* Dropping groups can increase privileges to resources that deny
     588             :      certain groups so don't do that, just check that we have no
     589             :      supplementary group IDs. */
     590           0 :   int getgroups_cnt = getgroups( 0UL, NULL );
     591           0 :   if( -1==getgroups_cnt )                                            FD_LOG_ERR(( "getgroups() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     592           0 :   if( getgroups_cnt>1 )                                              FD_LOG_WARNING(( "getgroups() returned multiple supplementary groups (%d), run `id` to see them. "
     593           0 :                                                                                       "Continuing, but it is suggested to run Firedancer with a sandbox user that has as few permissions as possible.", getgroups_cnt ));
     594             : 
     595             :   /* Replace the session keyring in the process with a new
     596             :      anonymous one, in case the systemd or other launcher
     597             :      provided us with something by mistake. */
     598           0 :   if( -1==syscall( SYS_keyctl, KEYCTL_JOIN_SESSION_KEYRING, NULL ) ) FD_LOG_ERR(( "syscall(SYS_keyctl) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     599             : 
     600             :   /* Detach from the controlling terminal to prevent TIOCSTI type of
     601             :      escapes.  See https://github.com/containers/bubblewrap/issues/142 */
     602           0 :   if( !keep_controlling_terminal ) {
     603           0 :     if( -1==setsid() )                                               FD_LOG_ERR(( "setsid() failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     604           0 :   }
     605             : 
     606             :   /* Certain Linux kernels are configured to not allow user namespaces
     607             :      from an unprivileged process, since it's a common security exploit
     608             :      vector.  You can still make the namespace if you have CAP_SYS_ADMIN
     609             :      so we need to make sure to carry this through the switch_uid_gid
     610             :      which would drop all capabilities by default. */
     611           0 :   int userns_requires_cap_sys_admin = fd_sandbox_requires_cap_sys_admin( desired_uid, desired_gid );
     612           0 :   if( userns_requires_cap_sys_admin ) {
     613           0 :     if( -1==prctl( PR_SET_KEEPCAPS, 1 ) ) FD_LOG_ERR(( "prctl(PR_SET_KEEPCAPS, 1) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     614           0 :   }
     615           0 :   fd_sandbox_private_switch_uid_gid( desired_uid, desired_gid );
     616             : 
     617             :   /* Now raise CAP_SYS_ADMIN again after we switched UID/GID, if it's
     618             :      required to create the user namespace. */
     619           0 :   if( userns_requires_cap_sys_admin ) {
     620           0 :     struct __user_cap_header_struct capheader;
     621           0 :     capheader.version = _LINUX_CAPABILITY_VERSION_3;
     622           0 :     capheader.pid = 0;
     623           0 :     struct __user_cap_data_struct capdata[2] = { {0} };
     624           0 :     if( -1==syscall( SYS_capget, &capheader, capdata ) ) FD_LOG_ERR(( "syscall(SYS_capget) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     625           0 :     capdata[ CAP_TO_INDEX( CAP_SYS_ADMIN ) ].effective |= CAP_TO_MASK( CAP_SYS_ADMIN );
     626           0 :     if( -1==syscall( SYS_capset, &capheader, capdata ) ) FD_LOG_ERR(( "syscall(SYS_capset) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     627           0 :   }
     628             : 
     629             :   /* Now unshare the user namespace, disallow creating any more
     630             :      namespaces except one child user namespace, and then create the
     631             :      child user namespace so that the sandbox can't undo the change. */
     632           0 :   if( -1==unshare( CLONE_NEWUSER ) ) FD_LOG_ERR(( "unshare(CLONE_NEWUSER) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     633           0 :   fd_sandbox_private_write_userns_uid_gid_maps( desired_uid, desired_gid );
     634             : 
     635             :   /* Unshare everything in the parent user namespace, so that the nested
     636             :      user namespace does not have privileges over them. */
     637           0 :   int flags = CLONE_NEWNS | CLONE_NEWCGROUP | CLONE_NEWIPC | CLONE_NEWUTS;
     638           0 :   if( !keep_host_networking ) flags |= CLONE_NEWNET;
     639             : 
     640           0 :   if( -1==unshare( flags ) ) FD_LOG_ERR(( "unshare(CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWCGROUP | CLONE_NEWIPC | CLONE_NEWUTS) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     641             : 
     642           0 :   fd_sandbox_private_deny_namespaces();
     643             : 
     644           0 :   if( -1==unshare( CLONE_NEWUSER ) ) FD_LOG_ERR(( "unshare(CLONE_NEWUSER) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     645           0 :   fd_sandbox_private_write_userns_uid_gid_maps( 1, 1 );
     646             : 
     647             :   /* PR_SET_KEEPCAPS will already be 0 if we didn't need to raise
     648             :      CAP_SYS_ADMIN, but we always clear it anyway. */
     649           0 :   if( -1==prctl( PR_SET_KEEPCAPS, 0 ) ) FD_LOG_ERR(( "prctl(PR_SET_KEEPCAPS, 0) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     650           0 :   if( -1==prctl( PR_SET_DUMPABLE, 0 ) ) FD_LOG_ERR(( "prctl(PR_SET_DUMPABLE, 0) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     651             : 
     652             :   /* Now remount the filesystem root so no files are accessible any more. */
     653           0 :   fd_sandbox_private_pivot_root();
     654             : 
     655             :   /* Add an empty landlock restriction to further prevent filesystem
     656             :      access. */
     657           0 :   fd_sandbox_private_landlock_restrict_self();
     658             : 
     659             :   /* And trim all the resource limits down to zero. */
     660           0 :   fd_sandbox_private_set_rlimits( rlimit_file_cnt );
     661             : 
     662             :   /* And drop all the capabilities we have in the new user namespace. */
     663           0 :   fd_sandbox_private_drop_caps( cap_last_cap );
     664             : 
     665           0 :   if( -1==prctl( PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0 ) ) FD_LOG_ERR(( "prctl(PR_SET_NO_NEW_PRIVS, 1) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     666           0 : }
     667             : 
     668             : void
     669             : fd_sandbox_enter( uint                 desired_uid,
     670             :                   uint                 desired_gid,
     671             :                   int                  keep_host_networking,
     672             :                   int                  keep_controlling_terminal,
     673             :                   ulong                rlimit_file_cnt,
     674             :                   ulong                allowed_file_descriptor_cnt,
     675             :                   int const *          allowed_file_descriptor,
     676             :                   ulong                seccomp_filter_cnt,
     677           0 :                   struct sock_filter * seccomp_filter ) {
     678           0 :   if( seccomp_filter_cnt>USHORT_MAX ) FD_LOG_ERR(( "seccomp_filter_cnt must not be more than %d", USHORT_MAX ));
     679             : 
     680           0 :   fd_sandbox_private_enter_no_seccomp( desired_uid,
     681           0 :                                        desired_gid,
     682           0 :                                        keep_host_networking,
     683           0 :                                        keep_controlling_terminal,
     684           0 :                                        rlimit_file_cnt,
     685           0 :                                        allowed_file_descriptor_cnt,
     686           0 :                                        allowed_file_descriptor );
     687             : 
     688           0 :   FD_LOG_INFO(( "sandbox: full sandbox is being enabled" )); /* log before seccomp in-case logging not allowed in sandbox */
     689             : 
     690             :   /* Now finally install the seccomp-bpf filter. */
     691           0 :   fd_sandbox_private_set_seccomp_filter( (ushort)seccomp_filter_cnt, seccomp_filter );
     692           0 : }
     693             : 
     694             : void
     695             : fd_sandbox_switch_uid_gid( uint desired_uid,
     696           0 :                            uint desired_gid ) {
     697           0 :   fd_sandbox_private_switch_uid_gid( desired_uid, desired_gid );
     698           0 :   FD_LOG_INFO(( "sandbox: sandbox disabled" ));
     699           0 : }
     700             : 
     701             : ulong
     702           0 : fd_sandbox_getpid( void ) {
     703           0 :   char pid[ 11 ] = {0}; /* 10 characters for INT_MAX, and then a NUL terminator. */
     704           0 :   long count = readlink( "/proc/self", pid, sizeof(pid) );
     705           0 :   if( -1L==count )                FD_LOG_ERR(( "readlink(/proc/self) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     706           0 :   if( (ulong)count>=sizeof(pid) ) FD_LOG_ERR(( "readlink(/proc/self) returned truncated pid" ));
     707             : 
     708           0 :   char * endptr;
     709           0 :   ulong result = strtoul( pid, &endptr, 10 );
     710             :   /* A pid > INT_MAX is malformed, even if we can represent it in the
     711             :      ulong we are returning. */
     712           0 :   if( *endptr!='\0' || result>INT_MAX ) FD_LOG_ERR(( "strtoul(/proc/self) returned invalid pid" ));
     713             : 
     714           0 :   return result;
     715           0 : }
     716             : 
     717             : ulong
     718           0 : fd_sandbox_gettid( void ) {
     719           0 :   char tid[ 27 ] = {0}; /* 10 characters for INT_MAX, twice, + /task/ and then a NUL terminator. */
     720           0 :   long count = readlink( "/proc/thread-self", tid, sizeof(tid) );
     721           0 :   if( count<0L )                  FD_LOG_ERR(( "readlink(/proc/thread-self) failed (%i-%s)", errno, fd_io_strerror( errno ) ));
     722           0 :   if( (ulong)count>=sizeof(tid) ) FD_LOG_ERR(( "readlink(/proc/thread-self) returned truncated tid" ));
     723             : 
     724           0 :   char * taskstr = strchr( tid, '/' );
     725           0 :   if( !taskstr ) FD_LOG_ERR(( "readlink(/proc/thread-self) returned invalid tid" ));
     726           0 :   taskstr++;
     727             : 
     728           0 :   char * task = strchr( taskstr, '/' );
     729           0 :   if( !task ) FD_LOG_ERR(( "readlink(/proc/thread-self) returned invalid tid" ));
     730             : 
     731           0 :   char * endptr;
     732           0 :   ulong result = strtoul( task+1UL, &endptr, 10 );
     733             :   /* A tid > INT_MAX is malformed, even if we can represent it in the
     734             :      ulong we are returning. */
     735           0 :   if( *endptr!='\0' || result>INT_MAX ) FD_LOG_ERR(( "strtoul(/proc/self) returned invalid tid" ));
     736             : 
     737           0 :   return result;
     738           0 : }

Generated by: LCOV version 1.14