LCOV - code coverage report
Current view: top level - util/shmem - fd_numa_linux.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 73 93 78.5 %
Date: 2024-11-13 11:58:15 Functions: 9 10 90.0 %

          Line data    Source code
       1             : /* syscall API requires _GNU_SOURCE */
       2             : #define _GNU_SOURCE
       3             : #include "fd_shmem_private.h"
       4             : #include "../sanitize/fd_msan.h"
       5             : #include <errno.h>
       6             : #include <dirent.h>
       7             : #include <sys/sysinfo.h>
       8             : 
       9             : /* The below uses the sysfs API added ~2009-Dec.  See
      10             :    https://github.com/torvalds/linux/commit/1830794ae6392ce12d36dbcc5ff52f11298ddab6 */
      11             : 
      12             : /* fd_numa_private_parse_node_idx parses a cstr of the form
      13             :    `node[0-9]+` into a node idx.  The value will strictly interpreted as
      14             :    a non-negative base 10 value.  Returns -1 if the value could not be
      15             :    parsed (e.g. s is NULL, s does not have a node prefix, s does not
      16             :    have a base 10 suffix, the value overflows an int representation).
      17             :    FIXME: consider having the user pass the prefix to scan for to allow
      18             :    extracting more general indices from sysfs paths. */
      19             : 
      20             : FD_FN_PURE static int
      21      604065 : fd_numa_private_parse_node_idx( char const * s ) {
      22      604065 :   if( FD_UNLIKELY( !s ) ) return -1;
      23      604065 :   if( FD_UNLIKELY( strncmp( s, "node", 4UL ) ) ) return -1;
      24       75075 :   s += 4;
      25             : 
      26       75075 :   long val = 0L;
      27             : 
      28       75075 :   char const * t = s;
      29      150150 :   for(;;) {
      30      150150 :     char c = *t;
      31      150150 :     if( !c ) break; /* host dep branch prob */
      32       75075 :     if( FD_UNLIKELY( !(('0'<=c) | (c<='9')) ) ) return -1; /* non-digit encountered */
      33       75075 :     val = (long)(c-'0') + 10L*val;
      34       75075 :     if( FD_UNLIKELY( val>(long)INT_MAX ) ) return -1; /* overflow */
      35       75075 :     t++;
      36       75075 :   }
      37       75075 :   if( FD_UNLIKELY( s==t ) ) return -1; /* empty idx */
      38             : 
      39       75075 :   return (int)val;
      40       75075 : }
      41             : 
      42             : ulong
      43        1155 : fd_numa_node_cnt( void ) {
      44             : 
      45             :   /* Open sysfs dir containing NUMA config.  Abort if this fails. */
      46             : 
      47        1155 :   char const * path = "/sys/devices/system/node";
      48        1155 :   DIR *        dir  = opendir( path );
      49        1155 :   if( FD_UNLIKELY( !dir ) ) {
      50           0 :     FD_LOG_WARNING(( "opendir( \"%s\" ) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
      51           0 :     return 0UL;
      52           0 :   }
      53             : 
      54             :   /* Scan dir to get number of NUMA nodes.  Note that we do not assume
      55             :      the system indexes numa nodes contiguously (but it almost certainly
      56             :      does). */
      57             : 
      58        1155 :   int node_idx_max = INT_MIN;
      59       13860 :   for(;;) {
      60       13860 :     struct dirent * dirent = readdir( dir );
      61       13860 :     if( !dirent ) break;
      62       12705 :     node_idx_max = fd_int_max( fd_numa_private_parse_node_idx( dirent->d_name ), node_idx_max );
      63       12705 :   }
      64             : 
      65             :   /* Close dir and return what was found */
      66             : 
      67        1155 :   if( FD_UNLIKELY( closedir( dir ) ) )
      68           0 :     FD_LOG_WARNING(( "closedir( \"%s\" ) failed (%i-%s); attempting to continue", path, errno, fd_io_strerror( errno ) ));
      69             : 
      70        1155 :   if( FD_UNLIKELY( node_idx_max<0 ) ) {
      71           0 :     FD_LOG_WARNING(( "No numa nodes found in \"%s\"", path ));
      72           0 :     return 0UL;
      73           0 :   }
      74             : 
      75        1155 :   return ((ulong)node_idx_max) + 1UL;
      76        1155 : }
      77             : 
      78             : ulong
      79        1155 : fd_numa_cpu_cnt( void ) {
      80             : 
      81             :   /* FIXME: Consider using get_nprocs_conf, syscall or sysfs director
      82             :      scan. */
      83             : 
      84        1155 :   int cpu_cnt = get_nprocs();
      85        1155 :   if( FD_UNLIKELY( cpu_cnt<=0 ) ) {
      86           0 :     FD_LOG_WARNING(( "Unexpected return (%i) from get_nprocs", cpu_cnt ));
      87           0 :     return 0UL;
      88           0 :   }
      89             : 
      90        1155 :   return (ulong)cpu_cnt;
      91        1155 : }
      92             : 
      93             : ulong
      94       73920 : fd_numa_node_idx( ulong cpu_idx ) {
      95             : 
      96             :   /* Open sysfs dir containing CPU config.  Abort if this fails. */
      97             : 
      98       73920 :   char  path[64];
      99       73920 :   DIR * dir = opendir( fd_cstr_printf( path, 64UL, NULL, "/sys/devices/system/cpu/cpu%lu", cpu_idx ) );
     100       73920 :   if( FD_UNLIKELY( !dir ) ) {
     101           0 :     FD_LOG_WARNING(( "opendir( \"%s\" ) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
     102           0 :     return ULONG_MAX;
     103           0 :   }
     104             : 
     105             :   /* Scan dir for symlink to numa config */
     106             : 
     107       73920 :   int node_idx = -1;
     108      591360 :   for(;;) {
     109      591360 :     struct dirent * dirent = readdir( dir );
     110      591360 :     if( !dirent ) break;
     111      591360 :     node_idx = fd_numa_private_parse_node_idx( dirent->d_name );
     112      591360 :     if( node_idx!=-1 ) break;
     113      591360 :   }
     114             : 
     115             :   /* Close dir and return what was found */
     116             : 
     117       73920 :   if( FD_UNLIKELY( closedir( dir ) ) )
     118           0 :     FD_LOG_WARNING(( "closedir( \"%s\" ) failed (%i-%s); attempting to continue", path, errno, fd_io_strerror( errno ) ));
     119             : 
     120       73920 :   if( FD_UNLIKELY( node_idx<0 ) ) {
     121           0 :     FD_LOG_WARNING(( "No numa node found in \"%s\"", path ));
     122           0 :     return ULONG_MAX;
     123           0 :   }
     124             : 
     125       73920 :   return (ulong)node_idx;
     126       73920 : }
     127             : 
     128             : /* FIXME: probably should do a FD_HAS_ASAN switch for the below to use
     129             :    the appropriate functionality when FD_HAS_ASAN is set (or maybe have
     130             :    a separate implementation for compiling under FD_HAS_ASAN). */
     131             : 
     132             : #include <unistd.h>
     133             : #include <sys/syscall.h>
     134             : 
     135             : /* Note that the LLVM AddressSanitizer (ASan) intercepts all mlock
     136             :    calls.
     137             : 
     138             :    This has an interesting history.  These interceptors were first added
     139             :    in 2012 and are still present in LLVM 14.0.6:
     140             : 
     141             :      https://github.com/llvm/llvm-project/commit/71d759d392f03025bcc8b20f060bc5c22e580ea1
     142             : 
     143             :    They stub `mlock`, `munlock`, `mlockall`, `munlockall` to no-ops.
     144             : 
     145             :    ASan is known to map large amounts (~16TiB) of unbacked pages.  This
     146             :    rules out the use of `mlockall`.
     147             : 
     148             :    `mlock` only locks selected pages, therefore should be fine.  The
     149             :    comments in various revisions of these interceptors suggest that
     150             :    older Linux kernels had a bug that prevented the use of `mlock`.
     151             : 
     152             :    However, current Firedancer will use the `move_pages` syscall to
     153             :    verify whether "allocated" pages are actually backed by DRAM.
     154             : 
     155             :    This makes Firedancer and ASan incompatible unless we either
     156             : 
     157             :      1) Remove the `mlock` interceptor upstream, or
     158             :      2) Circumvent the interceptor with a raw syscall
     159             : 
     160             :    We do option 2 below */
     161             : 
     162             : int
     163             : fd_numa_mlock( void const * addr,
     164        1587 :                ulong        len ) {
     165        1587 :   return (int)syscall( SYS_mlock, addr, len );
     166        1587 : }
     167             : 
     168             : int
     169             : fd_numa_munlock( void const * addr,
     170           0 :                  ulong        len ) {
     171           0 :   return (int)syscall( SYS_mlock, addr, len );
     172           0 : }
     173             : 
     174             : long
     175             : fd_numa_get_mempolicy( int *   mode,
     176             :                        ulong * nodemask,
     177             :                        ulong   maxnode,
     178             :                        void *  addr,
     179         546 :                        uint    flags ) {
     180         546 :   long rc = syscall( SYS_get_mempolicy, mode, nodemask, maxnode, addr, flags );
     181         546 :   if( rc==0 ) {
     182         546 :     if( mode     ) fd_msan_unpoison( mode, sizeof(int) );
     183         546 :     if( nodemask ) fd_msan_unpoison( nodemask, 8UL*((maxnode+63UL)/64UL) );
     184         546 :   }
     185         546 :   return rc;
     186         546 : }
     187             : 
     188             : long
     189             : fd_numa_set_mempolicy( int           mode,
     190             :                        ulong const * nodemask,
     191        1071 :                        ulong         maxnode ) {
     192        1071 :   return syscall( SYS_set_mempolicy, mode, nodemask, maxnode );
     193        1071 : }
     194             : 
     195             : long
     196             : fd_numa_mbind( void *        addr,
     197             :                ulong         len,
     198             :                int           mode,
     199             :                ulong const * nodemask,
     200             :                ulong         maxnode,
     201         525 :                uint          flags ) {
     202         525 :   return syscall( SYS_mbind, addr, len, mode, nodemask, maxnode, flags );
     203         525 : }
     204             : 
     205             : long
     206             : fd_numa_move_pages( int         pid,
     207             :                     ulong       count,
     208             :                     void **     pages,
     209             :                     int const * nodes,
     210             :                     int *       status,
     211      126354 :                     int         flags ) {
     212      126354 :   long rc = syscall( SYS_move_pages, pid, count, pages, nodes, status, flags );
     213      126354 :   if( rc==0 ) fd_msan_unpoison( status, count*sizeof(int) );
     214      126354 :   return rc;
     215      126354 : }

Generated by: LCOV version 1.14