LCOV - code coverage report
Current view: top level - util/scratch - fd_scratch.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 111 177 62.7 %
Date: 2025-03-20 12:08:36 Functions: 64 22656 0.3 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_util_scratch_fd_scratch_h
       2             : #define HEADER_fd_src_util_scratch_fd_scratch_h
       3             : 
       4             : /* APIs for high performance scratch pad memory allocation.  There
       5             :    are two allocators provided.  One is fd_alloca, which is an alignment
       6             :    aware equivalent of alloca.  It is meant for use anywhere alloca
       7             :    would normally be used.  This is only available if the built target
       8             :    has the FD_HAS_ALLOCA capability.  The second as fd_scratch_alloc.
       9             :    It is meant for use in situations that have very complex and large
      10             :    temporary memory usage. */
      11             : 
      12             : #include "../sanitize/fd_sanitize.h"
      13             : #include "../tile/fd_tile.h"
      14             : #include "../valloc/fd_valloc.h"
      15             : 
      16             : /* FD_SCRATCH_USE_HANDHOLDING:  Define this to non-zero at compile time
      17             :    to turn on additional run-time checks. */
      18             : 
      19             : #ifndef FD_SCRATCH_USE_HANDHOLDING
      20             : #if FD_HAS_DEEPASAN
      21             : #define FD_SCRATCH_USE_HANDHOLDING 1
      22             : #else
      23             : #define FD_SCRATCH_USE_HANDHOLDING 0
      24             : #endif
      25             : #endif
      26             : 
      27             : /* FD_SCRATCH_ALLOC_ALIGN_DEFAULT is the default alignment to use for
      28             :    allocations.
      29             : 
      30             :    Default should be at least 16 for consistent cross platform behavior
      31             :    that is language conformant across a wide range of targets (i.e. the
      32             :    largest primitive type across all possible build ... practically
      33             :    sizeof(int128)).  This also naturally covers SSE natural alignment on
      34             :    x86.  8 could be used if features like int128 and so forth and still
      35             :    be linguistically conformant (sizeof(ulong) here is the limit).
      36             :    Likewise, 32, 64, 128 could be used to guarantee all allocations will
      37             :    have natural AVX/AVX2, natural AVX-512 / cache-line,
      38             :    adjacent-cache-line-prefetch false sharing avoidance / natural GPU
      39             :    alignment properties.
      40             : 
      41             :    128 for default was picked as double x86 cache line for ACLPF false
      42             :    sharing avoidance and for consistency with GPU warp sizes ... i.e.
      43             :    the default allocation behaviors are naturally interthread
      44             :    communication false sharing resistant and GPU friendly.  This also
      45             :    naturally covers cases like SSE, AVX, AVX2 and AVX-512. */
      46             : 
      47    14950955 : #define FD_SCRATCH_ALIGN_DEFAULT (128UL) /* integer power-of-2 >=16 */
      48             : 
      49             : /* FD_SCRATCH_{SMEM,FMEM}_ALIGN give the alignment requirements for
      50             :    the memory regions used to a scratch pad memory.  There are not many
      51             :    restrictions on the SMEM alignment practically other than it be a
      52             :    reasonable integer power of two.  128 was picked to harmonize with
      53             :    FD_SCRATCH_ALIGN_DEFAULT (which does have more technical motivations
      54             :    behind its choice) but this is not strictly required.
      55             :    FD_SCRATCH_FMEM_ALIGN is required to be sizeof(ulong). */
      56             : 
      57       49155 : #define FD_SCRATCH_SMEM_ALIGN (128UL) /* integer power-of-2, harmonized with ALIGN_DEFAULT */
      58             : #define FD_SCRATCH_FMEM_ALIGN   (8UL) /* ==sizeof(ulong) but avoids bugs with some compilers */
      59             : 
      60             : FD_PROTOTYPES_BEGIN
      61             : 
      62             : /* Private APIs *******************************************************/
      63             : 
      64             : #if FD_SCRATCH_USE_HANDHOLDING
      65             : extern FD_TL int     fd_scratch_in_prepare;
      66             : #endif
      67             : 
      68             : extern FD_TL ulong   fd_scratch_private_start;
      69             : extern FD_TL ulong   fd_scratch_private_free;
      70             : extern FD_TL ulong   fd_scratch_private_stop;
      71             : 
      72             : extern FD_TL ulong * fd_scratch_private_frame;
      73             : extern FD_TL ulong   fd_scratch_private_frame_cnt;
      74             : extern FD_TL ulong   fd_scratch_private_frame_max;
      75             : 
      76             : FD_FN_CONST static inline int
      77     2596157 : fd_scratch_private_align_is_valid( ulong align ) {
      78     2596157 :   return !(align & (align-1UL)); /* returns true if power or 2 or zero, compile time typically */
      79     2596157 : }
      80             : 
      81             : FD_FN_CONST static inline ulong
      82    14193345 : fd_scratch_private_true_align( ulong align ) {
      83    14193345 :   return fd_ulong_if( !align, FD_SCRATCH_ALIGN_DEFAULT, align ); /* compile time typically */
      84    14193345 : }
      85             : 
      86             : /* Public APIs ********************************************************/
      87             : 
      88             : /* Constructor APIs */
      89             : 
      90             : /* fd_scratch_smem_{align,footprint} return the alignment and footprint
      91             :    of a memory region suitable for use as a scratch pad memory that can
      92             :    hold up to smax bytes.  There are very few restrictions on the nature
      93             :    of this memory.  It could even be just a flat address space that is
      94             :    not backed by an actual physical memory as far as scratch is
      95             :    concerned.  In typical use cases though, the scratch pad memory
      96             :    should point to a region of huge or gigantic page backed memory on
      97             :    the caller's numa node.
      98             : 
      99             :    A shared memory region for smem is fine for smem.  This could be used
     100             :    for example to allow other threads / processes to access a scratch
     101             :    allocation from this thread for the lifetime of a scratch allocation.
     102             : 
     103             :    Even more generally, a shared memory region for both smem and fmem
     104             :    could make it is theoretically possible to have a scratch pad memory
     105             :    that is shared across multiple threads / processes.  The API is not
     106             :    well designed for such though (the main reason to use fmem in shared
     107             :    memory would be convenience and/or adding hot swapping
     108             :    functionality).  In the common scratch scenario, every thread would
     109             :    attach to their local join of the shared smem and shared fmem.  But
     110             :    since the operations below are not designed to be thread safe, the
     111             :    threads would have to protect against concurrent use of push and pop
     112             :    (and attach would probably need to be tweaked to make it easier to
     113             :    attach to an already in use scratch pad).
     114             : 
     115             :    Compile time allocation is possible via the FD_SCRATCH_SMEM_ALIGN
     116             :    define.  E.g.:
     117             : 
     118             :      uchar my_smem[ MY_SMAX ] __attribute__((aligned(FD_SCRATCH_SMEM_ALIGN)));
     119             : 
     120             :    will be valid to use as a scratch smem with space for up to MY_SMAX
     121             :    bytes. */
     122             : 
     123           0 : FD_FN_CONST static inline ulong fd_scratch_smem_align( void ) { return FD_SCRATCH_SMEM_ALIGN; }
     124             : 
     125             : FD_FN_CONST static inline ulong
     126       49155 : fd_scratch_smem_footprint( ulong smax ) {
     127       49155 :   return fd_ulong_align_up( smax, FD_SCRATCH_SMEM_ALIGN );
     128       49155 : }
     129             : 
     130             : /* fd_scratch_fmem_{align,footprint} return the alignment and footprint
     131             :    of a memory region suitable for holding the scratch pad memory
     132             :    metadata (typically very small).  The scratch pad memory will be
     133             :    capable of holding up to depth scratch frames.
     134             : 
     135             :    Compile time allocation is possible via the FD_SCRATCH_FMEM_ALIGN
     136             :    define.  E.g.
     137             : 
     138             :      ulong my_fmem[ MY_DEPTH ] __attribute((aligned(FD_SCRATCH_FMEM_ALIGN)));
     139             : 
     140             :    or, even simpler:
     141             : 
     142             :      ulong my_fmem[ MY_DEPTH ];
     143             : 
     144             :    will be valid to use as a scratch fmem with space for up to depth
     145             :    frames.  The attribute variant is not strictly necessary, just for
     146             :    consistency with the smem above (where it is required). */
     147             : 
     148           0 : FD_FN_CONST static inline ulong fd_scratch_fmem_align    ( void        ) { return sizeof(ulong);       }
     149          51 : FD_FN_CONST static inline ulong fd_scratch_fmem_footprint( ulong depth ) { return sizeof(ulong)*depth; }
     150             : 
     151             : /* fd_scratch_attach attaches the calling thread to memory regions
     152             :    sufficient to hold up to smax (positive) bytes and with up to depth
     153             :    (positive) frames.  smem/fmem should have the required alignment and
     154             :    footprint specified for smax/depth from the above and be non-NULL).
     155             :    The caller has a read/write interest in these regions while attached
     156             :    (and thus the local lifetime of these regions must cover the lifetime
     157             :    of the attachment).  Only one scratch pad memory may be attached to a
     158             :    caller at a time.  This cannot fail from the caller's point of view
     159             :    (if handholding is enabled, it will abort the caller with a
     160             :    descriptive error message if used obviously in error). */
     161             : 
     162             : static inline void
     163             : fd_scratch_attach( void * smem,
     164             :                    void * fmem,
     165             :                    ulong  smax,
     166          54 :                    ulong  depth ) {
     167             : 
     168             : # if FD_SCRATCH_USE_HANDHOLDING
     169           0 :   if( FD_UNLIKELY( fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "already attached" ));
     170           0 :   if( FD_UNLIKELY( !smem                        ) ) FD_LOG_ERR(( "bad smem"  ));
     171           0 :   if( FD_UNLIKELY( !fmem                        ) ) FD_LOG_ERR(( "bad fmem"  ));
     172           0 :   if( FD_UNLIKELY( !smax                        ) ) FD_LOG_ERR(( "bad smax"  ));
     173           0 :   if( FD_UNLIKELY( !depth                       ) ) FD_LOG_ERR(( "bad depth" ));
     174           0 :   fd_scratch_in_prepare = 0;
     175           0 : # endif
     176             : 
     177           0 :   fd_scratch_private_start     = (ulong)smem;
     178           0 :   fd_scratch_private_free      = fd_scratch_private_start;
     179           0 :   fd_scratch_private_stop      = fd_scratch_private_start + smax;
     180             : 
     181           0 :   fd_scratch_private_frame     = (ulong *)fmem;
     182           0 :   fd_scratch_private_frame_cnt = 0UL;
     183           0 :   fd_scratch_private_frame_max = depth;
     184             : 
     185             : # if FD_HAS_DEEPASAN
     186             :   /* Poison the entire smem region. Underpoison the boundaries to respect
     187             :      alignment requirements. */
     188             :   ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_ASAN_ALIGN );
     189             :   ulong aligned_end   = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
     190             :   fd_asan_poison( (void*)aligned_start, aligned_end - aligned_start );
     191             : # endif
     192             : #if FD_HAS_MSAN
     193             :   /* Mark the entire smem region as uninitialized. */
     194             :   ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_MSAN_ALIGN );
     195             :   ulong aligned_end   = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
     196             :   fd_msan_poison( (void*)aligned_start, aligned_end - aligned_start );
     197             : #endif
     198           0 : }
     199             : 
     200             : /* fd_scratch_detach detaches the calling thread from its current
     201             :    attachment.  Returns smem used on attach and, if opt_fmem is
     202             :    non-NULL, opt_fmem[0] will contain the fmem used on attach on return.
     203             : 
     204             :    This relinquishes the calling threads read/write interest on these
     205             :    memory regions.  All the caller's scratch frames are popped, any
     206             :    prepare in progress is canceled and all the caller's scratch
     207             :    allocations are freed implicitly by this.
     208             : 
     209             :    This cannot fail from the caller's point of view (if handholding is
     210             :    enabled, it will abort the caller with a descriptive error message if
     211             :    used obviously in error). */
     212             : 
     213             : static inline void *
     214          49 : fd_scratch_detach( void ** _opt_fmem ) {
     215             : 
     216             : # if FD_SCRATCH_USE_HANDHOLDING
     217           0 :   if( FD_UNLIKELY( !fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "not attached" ));
     218           0 :   fd_scratch_in_prepare = 0;
     219           0 : # endif
     220             : 
     221             : # if FD_HAS_DEEPASAN
     222             :   /* Unpoison the entire scratch space. There should now be an underlying
     223             :      allocation which has not been poisoned. */
     224             :   ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_ASAN_ALIGN );
     225             :   ulong aligned_stop  = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
     226             :   fd_asan_unpoison( (void*)aligned_start, aligned_stop - aligned_start );
     227             : # endif
     228             : 
     229           0 :   void * smem = (void *)fd_scratch_private_start;
     230           0 :   void * fmem = (void *)fd_scratch_private_frame;
     231             : 
     232           0 :   fd_scratch_private_start     = 0UL;
     233           0 :   fd_scratch_private_free      = 0UL;
     234           0 :   fd_scratch_private_stop      = 0UL;
     235             : 
     236           0 :   fd_scratch_private_frame     = NULL;
     237           0 :   fd_scratch_private_frame_cnt = 0UL;
     238           0 :   fd_scratch_private_frame_max = 0UL;
     239             : 
     240          49 :   if( _opt_fmem ) _opt_fmem[0] = fmem;
     241           0 :   return smem;
     242          49 : }
     243             : 
     244             : /* User APIs */
     245             : 
     246             : /* fd_scratch_{used,free} returns the number of bytes used/free in the
     247             :    caller's scratch.  Returns 0 if not attached.  Because of alignment
     248             :    overheads, an allocation is guaranteed to succeed if free>=sz+align-1
     249             :    where align is the actual alignment required for the allocation (e.g.
     250             :    align==0 -> default, align<min -> min).  It is guaranteed to fail if
     251             :    free<sz.  It might succeed or fail in between depending on the
     252             :    alignments of previously allocations.  These are freaky fast (O(3)
     253             :    fast asm operations under the hood). */
     254             : 
     255           9 : static inline ulong fd_scratch_used( void ) { return fd_scratch_private_free - fd_scratch_private_start; }
     256           9 : static inline ulong fd_scratch_free( void ) { return fd_scratch_private_stop - fd_scratch_private_free;  }
     257             : 
     258             : /* fd_scratch_frame_{used,free} returns the number of scratch frames
     259             :    used/free in the caller's scratch.  Returns 0 if not attached.  push
     260             :    is guaranteed to succeed if free is non-zero and guaranteed to fail
     261             :    otherwise.  pop is guaranteed to succeed if used is non-zero and
     262             :    guaranteed to fail otherwise.  These are freaky fast (O(1-3) fast asm
     263             :    operations under the hood). */
     264             : 
     265     2954118 : static inline ulong fd_scratch_frame_used( void ) { return fd_scratch_private_frame_cnt; }
     266     2999377 : static inline ulong fd_scratch_frame_free( void ) { return fd_scratch_private_frame_max - fd_scratch_private_frame_cnt; }
     267             : 
     268             : /* fd_scratch_reset frees all allocations (if any) and pops all scratch
     269             :    frames (if any) such that the caller's scratch will be in the same
     270             :    state it was immediately after attach.  The caller must be attached
     271             :    to a scratch memory to use.  This cannot fail from the caller's point
     272             :    of view (if handholding is enabled, it will abort the caller with a
     273             :    descriptive error message if used obviously in error).  This is
     274             :    freaky fast (O(3) fast asm operations under the hood). */
     275             : 
     276             : static inline void
     277         734 : fd_scratch_reset( void ) {
     278             : # if FD_SCRATCH_USE_HANDHOLDING
     279             :   if( FD_UNLIKELY( !fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "not attached" ));
     280             :   fd_scratch_in_prepare = 0;
     281             : # endif
     282         734 :   fd_scratch_private_free      = fd_scratch_private_start;
     283         734 :   fd_scratch_private_frame_cnt = 0UL;
     284             : 
     285             : /* Poison entire scratch space again. */
     286             : # if FD_HAS_DEEPASAN
     287             :   ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_ASAN_ALIGN );
     288             :   ulong aligned_stop  = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
     289             :   fd_asan_poison( (void*)aligned_start, aligned_stop - aligned_start );
     290             : # endif
     291             : # if FD_HAS_MSAN
     292             :   ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_MSAN_ALIGN );
     293             :   ulong aligned_stop  = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
     294             :   fd_msan_poison( (void*)aligned_start, aligned_stop - aligned_start );
     295             : # endif
     296         734 : }
     297             : 
     298             : /* fd_scratch_push creates a new scratch frame and makes it the current
     299             :    frame.  Assumes caller is attached to a scratch with space for a new
     300             :    frame.  This cannot fail from the caller's point of view (if
     301             :    handholding is enabled, it will abort the caller with a descriptive
     302             :    error message if used obviously in error).  This is freaky fast (O(5)
     303             :    fast asm operations under the hood). */
     304             : 
     305             : FD_FN_UNUSED static void /* Work around -Winline */
     306    13355427 : fd_scratch_push( void ) {
     307             : # if FD_SCRATCH_USE_HANDHOLDING
     308          24 :   if( FD_UNLIKELY( !fd_scratch_private_frame_max                              ) ) {
     309           0 :     FD_LOG_ERR(( "not attached" ));
     310           0 :   }
     311          24 :   if( FD_UNLIKELY( fd_scratch_private_frame_cnt>=fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "too many frames" ));
     312          24 :   fd_scratch_in_prepare = 0;
     313          24 : # endif
     314          24 :   fd_scratch_private_frame[ fd_scratch_private_frame_cnt++ ] = fd_scratch_private_free;
     315             : 
     316             :   /* Poison to end of scratch region to account for case of in-prep allocation
     317             :      getting implictly cancelled. */
     318             : # if FD_HAS_DEEPASAN
     319             :   ulong aligned_start = fd_ulong_align_up( fd_scratch_private_free, FD_ASAN_ALIGN );
     320             :   ulong aligned_stop  = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
     321             :   fd_asan_poison( (void*)aligned_start, aligned_stop - aligned_start );
     322             : # endif
     323             : #if FD_HAS_MSAN
     324             :   ulong aligned_start = fd_ulong_align_up( fd_scratch_private_free, FD_MSAN_ALIGN );
     325             :   ulong aligned_stop  = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
     326             :   fd_msan_poison( (void*)aligned_start, aligned_stop - aligned_start );
     327             : #endif
     328          24 : }
     329             : 
     330             : /* fd_scratch_pop frees all allocations in the current scratch frame,
     331             :    destroys the current scratch frame and makes the previous frame (if
     332             :    there is one) the current stack frame (and leaves the caller without
     333             :    a current frame if there is not one).  Assumes the caller is attached
     334             :    to a scratch memory with at least one frame in use.  This cannot fail
     335             :    from the caller's point of view (if handholding is enabled, it will
     336             :    abort the caller with a descriptive error message if used obviously
     337             :    in error).  This is freaky fast (O(5) fast asm operations under the
     338             :    hood). */
     339             : 
     340             : FD_FN_UNUSED static void /* Work around -Winline */
     341    13998302 : fd_scratch_pop( void ) {
     342             : # if FD_SCRATCH_USE_HANDHOLDING
     343          24 :   if( FD_UNLIKELY( !fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "not attached" ));
     344          24 :   if( FD_UNLIKELY( !fd_scratch_private_frame_cnt ) ) FD_LOG_ERR(( "unmatched pop" ));
     345          24 :   fd_scratch_in_prepare = 0;
     346          24 : # endif
     347          24 :   fd_scratch_private_free = fd_scratch_private_frame[ --fd_scratch_private_frame_cnt ];
     348             : 
     349             : # if FD_HAS_DEEPASAN
     350             :   /* On a pop() operation, the entire range from fd_scratch_private_free to the
     351             :      end of the scratch space can be safely poisoned. The region must be aligned
     352             :      to accomodate asan manual poisoning requirements. */
     353             :   ulong aligned_start = fd_ulong_align_up( fd_scratch_private_free, FD_ASAN_ALIGN );
     354             :   ulong aligned_stop  = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
     355             :   fd_asan_poison( (void*)aligned_start, aligned_stop - aligned_start );
     356             : # endif
     357             : #if FD_HAS_MSAN
     358             :   ulong aligned_start = fd_ulong_align_up( fd_scratch_private_free, FD_MSAN_ALIGN );
     359             :   ulong aligned_stop  = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
     360             :   fd_msan_poison( (void*)aligned_start, aligned_stop - aligned_start );
     361             : #endif
     362          24 : }
     363             : 
     364             : /* fd_scratch_prepare starts an allocation of unknown size and known
     365             :    alignment align (0 means use default alignment) in the caller's
     366             :    current scratch frame.  Returns a pointer in the caller's address
     367             :    space with alignment align to the first byte of a region with
     368             :    fd_scratch_free() (as observed after this function returns) bytes
     369             :    available.  The caller is free to clobber any bytes in this region.
     370             : 
     371             :    fd_scratch_publish finishes an in-progress allocation.  end points at
     372             :    the first byte after the final allocation.  Assumes there is a
     373             :    matching prepare.  A published allocation can be subsequently
     374             :    trimmed.
     375             : 
     376             :    fd_scratch_cancel cancels an in-progress allocation.  This is a no-op
     377             :    if there is no matching prepare.  If the prepare had alignment other
     378             :    than 1, it is possible that some alignment padding needed for the
     379             :    allocation will still be used in the caller's current scratch frame.
     380             :    If this is not acceptable, the prepare should use an alignment of 1
     381             :    and manually align the return.
     382             : 
     383             :    This allows idioms like:
     384             : 
     385             :      uchar * p = (uchar *)fd_scratch_prepare( align );
     386             : 
     387             :      if( FD_UNLIKELY( fd_scratch_free() < app_max_sz ) ) {
     388             : 
     389             :        fd_scratch_cancel();
     390             : 
     391             :        ... handle too little scratch space to handle application
     392             :        ... worst case needs here
     393             : 
     394             :      } else {
     395             : 
     396             :        ... populate sz bytes to p where sz is in [0,app_max_sz]
     397             :        p += sz;
     398             : 
     399             :        fd_scratch_publish( p );
     400             : 
     401             :        ... at this point, scratch is as though
     402             :        ... fd_scratch_alloc( align, sz ) was called above
     403             : 
     404             :      }
     405             : 
     406             :    Ideally every prepare should be matched with a publish or a cancel,
     407             :    only one prepare can be in-progress at a time on a thread and prepares
     408             :    cannot be nested.  As such virtually all other scratch operations
     409             :    will implicitly cancel any in-progress prepare, including attach /
     410             :    detach / push / pop / prepare / alloc / trim. */
     411             : 
     412             : FD_FN_UNUSED static void * /* Work around -Winline */
     413    11595905 : fd_scratch_prepare( ulong align ) {
     414             : 
     415             : # if FD_SCRATCH_USE_HANDHOLDING
     416          48 :   if( FD_UNLIKELY( !fd_scratch_private_frame_cnt               ) ) FD_LOG_ERR(( "unmatched push" ));
     417          48 :   if( FD_UNLIKELY( !fd_scratch_private_align_is_valid( align ) ) ) FD_LOG_ERR(( "bad align (%lu)", align ));
     418          48 : # endif
     419             : 
     420             : # if FD_HAS_DEEPASAN
     421             :   /* Need 8 byte alignment. */
     422             :   align            = fd_ulong_align_up( align, FD_ASAN_ALIGN );
     423             : # endif
     424          48 :   ulong true_align = fd_scratch_private_true_align( align );
     425          48 :   ulong smem       = fd_ulong_align_up( fd_scratch_private_free, true_align );
     426             : 
     427             : # if FD_SCRATCH_USE_HANDHOLDING
     428          48 :   if( FD_UNLIKELY( smem < fd_scratch_private_free ) ) FD_LOG_ERR(( "prepare align (%lu) overflow", true_align ));
     429          48 :   if( FD_UNLIKELY( smem > fd_scratch_private_stop ) ) FD_LOG_ERR(( "prepare align (%lu) needs %lu additional scratch",
     430          48 :                                                                    align, smem - fd_scratch_private_stop ));
     431          48 :   fd_scratch_in_prepare = 1;
     432          48 : # endif
     433             : 
     434             : # if FD_HAS_DEEPASAN
     435             :   /* At this point the user is able to clobber any bytes in the region. smem is
     436             :      always going to be at least 8 byte aligned. */
     437             :   ulong aligned_sz = fd_ulong_align_up( fd_scratch_private_stop - smem, FD_ASAN_ALIGN );
     438             :   fd_asan_unpoison( (void*)smem, aligned_sz );
     439             : # endif
     440             : 
     441          48 :   fd_scratch_private_free = smem;
     442          48 :   return (void *)smem;
     443          48 : }
     444             : 
     445             : static inline void
     446    11370359 : fd_scratch_publish( void * _end ) {
     447    11370359 :   ulong end = (ulong)_end;
     448             : 
     449             : # if FD_SCRATCH_USE_HANDHOLDING
     450          48 :   if( FD_UNLIKELY( !fd_scratch_in_prepare        ) ) FD_LOG_ERR(( "unmatched prepare" ));
     451          48 :   if( FD_UNLIKELY( end < fd_scratch_private_free ) ) FD_LOG_ERR(( "publish underflow" ));
     452          48 :   if( FD_UNLIKELY( end > fd_scratch_private_stop ) )
     453           0 :     FD_LOG_ERR(( "publish needs %lu additional scratch", end-fd_scratch_private_stop ));
     454          48 :   fd_scratch_in_prepare   = 0;
     455          48 : # endif
     456             : 
     457             :   /* Poison everything that is trimmed off. Conservatively poison potentially
     458             :      less than the region that is trimmed to respect alignment requirements. */
     459             : # if FD_HAS_DEEPASAN
     460             :   ulong aligned_free = fd_ulong_align_dn( fd_scratch_private_free, FD_ASAN_ALIGN );
     461             :   ulong aligned_end  = fd_ulong_align_up( end, FD_ASAN_ALIGN );
     462             :   ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
     463             :   fd_asan_poison( (void*)aligned_end, aligned_stop - aligned_end );
     464             :   fd_asan_unpoison( (void*)aligned_free, aligned_end - aligned_free );
     465             : # endif
     466             : # if FD_HAS_MSAN
     467             :   ulong aligned_free = fd_ulong_align_dn( fd_scratch_private_free, FD_ASAN_ALIGN );
     468             :   ulong aligned_end  = fd_ulong_align_up( end, FD_ASAN_ALIGN );
     469             :   ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
     470             :   fd_msan_poison( (void*)aligned_end, aligned_stop - aligned_end );
     471             :   fd_msan_unpoison( (void*)aligned_free, aligned_end - aligned_free );
     472             : # endif
     473             : 
     474          48 :   fd_scratch_private_free = end;
     475          48 : }
     476             : 
     477             : static inline void
     478      189586 : fd_scratch_cancel( void ) {
     479             : 
     480             : # if FD_SCRATCH_USE_HANDHOLDING
     481             :   if( FD_UNLIKELY( !fd_scratch_in_prepare ) ) FD_LOG_ERR(( "unmatched prepare" ));
     482             :   fd_scratch_in_prepare = 0;
     483             : # endif
     484             : 
     485      189586 : }
     486             : 
     487             : /* fd_scratch_alloc allocates sz bytes with alignment align in the
     488             :    caller's current scratch frame.  There should be no prepare in
     489             :    progress.  Note that this has same function signature as
     490             :    aligned_alloc (and not by accident).  It does have some less
     491             :    restrictive behaviors though.
     492             : 
     493             :    align must be 0 or an integer power of 2.  0 will be treated as
     494             :    FD_SCRATCH_ALIGN_DEFAULT.
     495             : 
     496             :    sz need not be a multiple of align.  Further, the underlying
     497             :    allocator does not implicitly round up sz to an align multiple (as
     498             :    such, scratch can allocate additional items in any tail padding that
     499             :    might have been implicitly reserved had it rounded up).  That is, if
     500             :    you really want to round up allocations to a multiple of align, then
     501             :    manually align up sz ... e.g. pass fd_ulong_align_up(sz,align) when
     502             :    align is non-zero to this call (this could be implemented as a
     503             :    compile time mode with some small extra overhead if desirable).
     504             : 
     505             :    sz 0 is fine.  This will currently return a properly aligned non-NULL
     506             :    pointer (the allocator might do some allocation under the hood to get
     507             :    the desired alignment and it is possible this might fail ... there is
     508             :    a case for returning NULL or an arbitrary but appropriately aligned
     509             :    non-NULL and this could be implemented as a compile time mode with
     510             :    some small extra overhead if desirable).
     511             : 
     512             :    This cannot fail from the caller's point of view (if handholding is
     513             :    enabled, it will abort the caller with a descriptive error message if
     514             :    used obviously in error).
     515             : 
     516             :    This is freaky fast (O(5) fast asm operations under the hood). */
     517             : 
     518             : FD_FN_UNUSED static void * /* Work around -Winline */
     519             : fd_scratch_alloc( ulong align,
     520    11026509 :                   ulong sz ) {
     521    11026509 :   ulong smem = (ulong)fd_scratch_prepare( align );
     522    11026509 :   ulong end  = smem + sz;
     523             : 
     524             : # if FD_SCRATCH_USE_HANDHOLDING
     525          48 :   if( FD_UNLIKELY( (end < smem) | (end > fd_scratch_private_stop) ) ) FD_LOG_ERR(( "sz (%lu) overflow", sz ));
     526          48 : # endif
     527             : 
     528          48 :   fd_scratch_publish( (void *)end );
     529          48 :   return (void *)smem;
     530    11026509 : }
     531             : 
     532             : /* fd_scratch_trim trims the size of the most recent scratch allocation
     533             :    in the current scratch frame (technically it can be used to trim the
     534             :    size of the entire current scratch frame but doing more than the most
     535             :    recent scratch allocation is strongly discouraged).  Assumes there is
     536             :    a current scratch frame and the caller is not in a prepare.  end
     537             :    points at the first byte to free in the most recent scratch
     538             :    allocation (or the first byte after the most recent scratch
     539             :    allocation).  This allows idioms like:
     540             : 
     541             :      uchar * p = (uchar *)fd_scratch_alloc( align, max_sz );
     542             : 
     543             :      ... populate sz bytes of p where sz is in [0,max_sz]
     544             :      p += sz;
     545             : 
     546             :      fd_scratch_trim( p );
     547             : 
     548             :      ... now the thread's scratch is as though original call was
     549             :      ... p = fd_scratch_alloc( align, sz );
     550             : 
     551             :    This cannot fail from the caller's point of view (if handholding is
     552             :    enabled, this will abort the caller with a descriptive error message
     553             :    if used obviously in error).
     554             : 
     555             :    Note that an allocation be repeatedly trimmed.
     556             : 
     557             :    Note also that trim can nest.  E.g. a thread can call a function that
     558             :    uses scratch with its own properly matched scratch pushes and pops.
     559             :    On function return, trim will still work on the most recent scratch
     560             :    alloc in that frame by the caller.
     561             : 
     562             :    This is freaky fast (O(1) fast asm operations under the hood). */
     563             : 
     564             : static inline void
     565      757610 : fd_scratch_trim( void * _end ) {
     566      757610 :   ulong end = (ulong)_end;
     567             : 
     568             : # if FD_SCRATCH_USE_HANDHOLDING
     569             :   if( FD_UNLIKELY( !fd_scratch_private_frame_cnt                                      ) ) FD_LOG_ERR(( "unmatched push" ));
     570             :   if( FD_UNLIKELY( end < fd_scratch_private_frame[ fd_scratch_private_frame_cnt-1UL ] ) ) FD_LOG_ERR(( "trim underflow" ));
     571             :   if( FD_UNLIKELY( end > fd_scratch_private_free                                      ) ) FD_LOG_ERR(( "trim overflow" ));
     572             :   fd_scratch_in_prepare = 0;
     573             : # endif
     574             : 
     575             : # if FD_HAS_DEEPASAN
     576             :   /* The region to poison should be from _end to the end of the scratch's region.
     577             :      The same alignment considerations need to be taken into account. */
     578             :   ulong aligned_end  = fd_ulong_align_up( end, FD_ASAN_ALIGN );
     579             :   ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
     580             :   fd_asan_poison( (void*)aligned_end, aligned_stop - aligned_end );
     581             : # endif
     582             : # if FD_HAS_MSAN
     583             :   ulong aligned_end  = fd_ulong_align_up( end, FD_MSAN_ALIGN );
     584             :   ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
     585             :   fd_msan_poison( (void*)aligned_end, aligned_stop - aligned_end );
     586             : # endif
     587             : 
     588      757610 :   fd_scratch_private_free = end;
     589      757610 : }
     590             : 
     591             : /* fd_scratch_*_is_safe returns false (0) if the operation is obviously
     592             :    unsafe to do at the time of the call or true otherwise.
     593             :    Specifically:
     594             : 
     595             :    fd_scratch_attach_is_safe() returns 1 if the calling thread is not
     596             :    already attached to scratch.
     597             : 
     598             :    fd_scratch_detach_is_safe() returns 1 if the calling thread is
     599             :    already attached to scratch.
     600             : 
     601             :    fd_scratch_reset_is_safe() returns 1 if the calling thread is already
     602             :    attached to scratch.
     603             : 
     604             :    fd_scratch_push_is_safe() returns 1 if there is at least one frame
     605             :    available and 0 otherwise.
     606             : 
     607             :    fd_scratch_pop_is_safe() returns 1 if there is at least one frame
     608             :    in use and 0 otherwise.
     609             : 
     610             :    fd_scratch_prepare_is_safe( align ) returns 1 if there is a current
     611             :    frame for the allocation and enough scratch pad memory to start
     612             :    preparing an allocation with alignment align.
     613             : 
     614             :    fd_scratch_publish_is_safe( end ) returns 1 if end is a valid
     615             :    location to complete an allocation in preparation.  If handholding is
     616             :    enabled, will additionally check that there is a prepare already in
     617             :    progress.
     618             : 
     619             :    fd_scratch_cancel_is_safe() returns 1.
     620             : 
     621             :    fd_scratch_alloc_is_safe( align, sz ) returns 1 if there is a current
     622             :    frame for the allocation and enough scratch pad memory for an
     623             :    allocation with alignment align and size sz.
     624             : 
     625             :    fd_scratch_trim_is_safe( end ) returns 1 if there is a current frame
     626             :    and that current frame can be trimmed to end safely.
     627             : 
     628             :    These are safe to call at any time and also freak fast handful of
     629             :    assembly operations. */
     630             : 
     631           0 : FD_FN_PURE static inline int fd_scratch_attach_is_safe( void ) { return  !fd_scratch_private_frame_max; }
     632           0 : FD_FN_PURE static inline int fd_scratch_detach_is_safe( void ) { return !!fd_scratch_private_frame_max; }
     633           0 : FD_FN_PURE static inline int fd_scratch_reset_is_safe ( void ) { return !!fd_scratch_private_frame_max; }
     634     5998538 : FD_FN_PURE static inline int fd_scratch_push_is_safe  ( void ) { return fd_scratch_private_frame_cnt<fd_scratch_private_frame_max; }
     635     5907916 : FD_FN_PURE static inline int fd_scratch_pop_is_safe   ( void ) { return !!fd_scratch_private_frame_cnt; }
     636             : 
     637             : FD_FN_PURE static inline int
     638           0 : fd_scratch_prepare_is_safe( ulong align ) {
     639           0 :   if( FD_UNLIKELY( !fd_scratch_private_frame_cnt               ) ) return 0; /* No current frame */
     640           0 :   if( FD_UNLIKELY( !fd_scratch_private_align_is_valid( align ) ) ) return 0; /* Bad alignment, compile time typically */
     641           0 :   ulong true_align = fd_scratch_private_true_align( align ); /* compile time typically */
     642           0 :   ulong smem       = fd_ulong_align_up( fd_scratch_private_free, true_align );
     643           0 :   if( FD_UNLIKELY( smem < fd_scratch_private_free              ) ) return 0; /* alignment overflow */
     644           0 :   if( FD_UNLIKELY( smem > fd_scratch_private_stop              ) ) return 0; /* insufficient scratch */
     645           0 :   return 1;
     646           0 : }
     647             : 
     648             : FD_FN_PURE static inline int
     649           0 : fd_scratch_publish_is_safe( void * _end ) {
     650           0 :   ulong end = (ulong)_end;
     651           0 : # if FD_SCRATCH_USE_HANDHOLDING
     652           0 :   if( FD_UNLIKELY( !fd_scratch_in_prepare        ) ) return 0; /* Not in prepare */
     653           0 : # endif
     654           0 :   if( FD_UNLIKELY( end < fd_scratch_private_free ) ) return 0; /* Backward */
     655           0 :   if( FD_UNLIKELY( end > fd_scratch_private_stop ) ) return 0; /* Out of bounds */
     656           0 :   return 1;
     657           0 : }
     658             : 
     659             : FD_FN_CONST static inline int
     660           0 : fd_scratch_cancel_is_safe( void ) {
     661           0 :   return 1;
     662           0 : }
     663             : 
     664             : FD_FN_PURE static inline int
     665             : fd_scratch_alloc_is_safe( ulong align,
     666     2913356 :                           ulong sz ) {
     667     2913356 :   if( FD_UNLIKELY( !fd_scratch_private_frame_cnt               ) ) return 0; /* No current frame */
     668     2596109 :   if( FD_UNLIKELY( !fd_scratch_private_align_is_valid( align ) ) ) return 0; /* Bad align, compile time typically */
     669     2596109 :   ulong true_align = fd_scratch_private_true_align( align ); /* compile time typically */
     670     2596109 :   ulong smem       = fd_ulong_align_up( fd_scratch_private_free, true_align );
     671     2596109 :   if( FD_UNLIKELY( smem < fd_scratch_private_free              ) ) return 0; /* align overflow */
     672     2596109 :   ulong free       = smem + sz;
     673     2596109 :   if( FD_UNLIKELY( free < smem                                 ) ) return 0; /* sz overflow */
     674     2596109 :   if( FD_UNLIKELY( free > fd_scratch_private_stop              ) ) return 0; /* too little space */
     675      757610 :   return 1;
     676     2596109 : }
     677             : 
     678             : FD_FN_PURE static inline int
     679           0 : fd_scratch_trim_is_safe( void * _end ) {
     680           0 :   ulong end = (ulong)_end;
     681           0 :   if( FD_UNLIKELY( !fd_scratch_private_frame_cnt                                      ) ) return 0; /* No current frame */
     682           0 :   if( FD_UNLIKELY( end < fd_scratch_private_frame[ fd_scratch_private_frame_cnt-1UL ] ) ) return 0; /* Trim underflow */
     683           0 :   if( FD_UNLIKELY( end > fd_scratch_private_free                                      ) ) return 0; /* Trim overflow */
     684           0 :   return 1;
     685           0 : }
     686             : 
     687             : /* fd_scratch_vtable is the virtual function table implementing
     688             :    fd_valloc for fd_scratch. */
     689             : 
     690             : extern const fd_valloc_vtable_t fd_scratch_vtable;
     691             : 
     692             : /* fd_scratch_virtual returns an abstract handle to the fd_scratch join.
     693             :    Valid for lifetime of scratch frame.  fd_valloc_t must be dropped
     694             :    before scratch frame changes or scratch detaches. */
     695             : 
     696             : FD_FN_CONST static inline fd_valloc_t
     697           0 : fd_scratch_virtual( void ) {
     698           0 :   fd_valloc_t valloc = { NULL, &fd_scratch_vtable };
     699           0 :   return valloc;
     700           0 : }
     701             : 
     702             : /* FD_SCRATCH_SCOPE_{BEGIN,END} create a `do { ... } while(0);` scope in
     703             :    which a temporary scratch frame is available.  Nested scopes are
     704             :    permitted.  This scratch frame is automatically destroyed when
     705             :    exiting the scope normally (e.g. by 'break', 'return', or reaching
     706             :    the end).  Uses a dummy variable with a cleanup attribute under the
     707             :    hood.  U.B. if scope is left abnormally (e.g. longjmp(), exception,
     708             :    abort(), etc.).  Use as follows:
     709             : 
     710             :    FD_SCRATCH_SCOPE_BEGIN {
     711             :      ...
     712             :      fd_scratch_alloc( ... );
     713             :      ...
     714             :    }
     715             :    FD_SCRATCH_SCOPE_END; */
     716             : 
     717             : FD_FN_UNUSED static inline void
     718          79 : fd_scratch_scoped_pop_private( void * _unused ) {
     719          79 :   (void)_unused;
     720          79 :   fd_scratch_pop();
     721          79 : }
     722             : 
     723          79 : #define FD_SCRATCH_SCOPE_BEGIN do {                         \
     724          79 :   fd_scratch_push();                                        \
     725          79 :   int __fd_scratch_guard_ ## __LINE__                       \
     726          79 :     __attribute__((cleanup(fd_scratch_scoped_pop_private))) \
     727          79 :     __attribute__((unused)) = 0;                            \
     728          79 :   do
     729             : 
     730          79 : #define FD_SCRATCH_SCOPE_END while(0); } while(0)
     731             : 
     732             : /* fd_alloca is variant of alloca that works like aligned_alloc.  That
     733             :    is, it returns an allocation of sz bytes with an alignment of at
     734             :    least align.  Like alloca, this allocation will be in the stack frame
     735             :    of the calling function with a lifetime of until the calling function
     736             :    returns.  Stack overflow handling is likewise identical to alloca
     737             :    (stack overflows will overlap the top stack guard, typically
     738             :    triggering a seg fault when the overflow region is touched that will
     739             :    be caught and handled by the logger to terminate the calling thread
     740             :    group).  As such, like alloca, these really should only be used for
     741             :    smallish (<< few KiB) quick allocations in bounded recursion depth
     742             :    circumstances.
     743             : 
     744             :    Like fd_scratch_alloc, align must be an 0 or a non-negative integer
     745             :    power of 2.  0 will be treated as align_default.  align smaller than
     746             :    align_min will be bumped up to align_min.
     747             : 
     748             :    The caller promises request will not overflow the stack.  This has to
     749             :    be implemented as a macro for linguistic reasons and align should be
     750             :    safe against multiple evaluation and, due to compiler limitations,
     751             :    must be a compile time constant.  Returns non-NULL on success and
     752             :    NULL on failure (in most situations, can never fail from the caller's
     753             :    POV).  sz==0 is okay (and will return non-NULL). */
     754             : 
     755             : #if FD_HAS_ALLOCA
     756             : 
     757             : /* Work around compiler limitations */
     758          33 : #define FD_SCRATCH_PRIVATE_TRUE_ALIGN( align ) ((align) ? (align) : FD_SCRATCH_ALIGN_DEFAULT)
     759             : 
     760          18 : #define fd_alloca(align,sz) __builtin_alloca_with_align( fd_ulong_max( (sz), 1UL ), \
     761          18 :                                                          8UL*FD_SCRATCH_PRIVATE_TRUE_ALIGN( (align) ) /*bits*/ )
     762             : 
     763             : /* fd_alloca_check does fd_alloca but it will FD_LOG_CRIT with a
     764             :    detailed message if the request would cause a stack overflow or leave
     765             :    so little available free stack that subsequent normal thread
     766             :    operations would be at risk.
     767             : 
     768             :    Note that returning NULL on failure is not an option as this would no
     769             :    longer be a drop-in instrumented replacement for fd_alloca (this
     770             :    would also require even more linguistic hacks to keep the fd_alloca
     771             :    at the appropriate scope).  Likewise, testing the allocated region is
     772             :    within the stack post allocation is not an option as the FD_LOG_CRIT
     773             :    invocation would then try to use stack with the already overflowed
     774             :    allocation in it (there is no easy portable way to guarantee an
     775             :    alloca has been freed short of returning from the function in which
     776             :    the alloca was performed).  Using FD_LOG_ERR instead of FD_LOG_CRIT
     777             :    is a potentially viable alternative error handling behavior though.
     778             : 
     779             :    This has to be implemented as a macro for linguistic reasons.  It is
     780             :    recommended this only be used for development / debugging / testing
     781             :    purposes (e.g. if you are doing alloca in production that are large
     782             :    enough you are worried about stack overflow, you probably should be
     783             :    using fd_scratch, fd_alloc or fd_wksp depending on performance and
     784             :    persistence needs or, better still, architecting to not need any
     785             :    temporary memory allocations at all).  If the caller's stack
     786             :    diagnostics could not be successfully initialized (this is logged),
     787             :    this will always FD_LOG_CRIT. */
     788             : 
     789             : #if !FD_HAS_ASAN
     790             : 
     791             : extern FD_TL ulong fd_alloca_check_private_sz;
     792             : 
     793             : #define fd_alloca_check( align, sz )                                                                             \
     794          15 :    ( fd_alloca_check_private_sz = (sz),                                                                          \
     795          15 :      (__extension__({                                                                                            \
     796          15 :        ulong _fd_alloca_check_private_pad_max   = FD_SCRATCH_PRIVATE_TRUE_ALIGN( (align) ) - 1UL;                \
     797          15 :        ulong _fd_alloca_check_private_footprint = fd_alloca_check_private_sz + _fd_alloca_check_private_pad_max; \
     798          15 :        if( FD_UNLIKELY( (_fd_alloca_check_private_footprint < _fd_alloca_check_private_pad_max      ) |          \
     799          15 :                         (_fd_alloca_check_private_footprint > (31UL*(fd_tile_stack_est_free() >> 5))) ) )        \
     800          15 :          FD_LOG_CRIT(( "fd_alloca_check( " #align ", " #sz " ) stack overflow" ));                               \
     801          15 :      })),                                                                                                        \
     802          15 :      fd_alloca( (align), fd_alloca_check_private_sz ) )
     803             : 
     804             : #else /* FD_HAS_ASAN */
     805             : 
     806             : /* AddressSanitizer provides its own alloca safety instrumentation
     807             :    which are more powerful than the above fd_alloca_check heuristics. */
     808             : 
     809             : #define fd_alloca_check fd_alloca
     810             : 
     811             : #endif /* FD_HAS_ASAN */
     812             : #endif /* FD_HAS_ALLOCA */
     813             : 
     814             : FD_PROTOTYPES_END
     815             : 
     816             : #endif /* HEADER_fd_src_util_scratch_fd_scratch_h */

Generated by: LCOV version 1.14