Line data Source code
1 : #ifndef HEADER_fd_src_util_scratch_fd_scratch_h
2 : #define HEADER_fd_src_util_scratch_fd_scratch_h
3 :
4 : /* APIs for high performance scratch pad memory allocation. There
5 : are two allocators provided. One is fd_alloca, which is an alignment
6 : aware equivalent of alloca. It is meant for use anywhere alloca
7 : would normally be used. This is only available if the built target
8 : has the FD_HAS_ALLOCA capability. The second as fd_scratch_alloc.
9 : It is meant for use in situations that have very complex and large
10 : temporary memory usage. */
11 :
12 : #include "../sanitize/fd_sanitize.h"
13 : #include "../tile/fd_tile.h"
14 : #include "../valloc/fd_valloc.h"
15 :
16 : /* FD_SCRATCH_USE_HANDHOLDING: Define this to non-zero at compile time
17 : to turn on additional run-time checks. */
18 :
19 : #ifndef FD_SCRATCH_USE_HANDHOLDING
20 : #define FD_SCRATCH_USE_HANDHOLDING 0
21 : #endif
22 :
23 : /* FD_SCRATCH_ALLOC_ALIGN_DEFAULT is the default alignment to use for
24 : allocations.
25 :
26 : Default should be at least 16 for consistent cross platform behavior
27 : that is language conformant across a wide range of targets (i.e. the
28 : largest primitive type across all possible build ... practically
29 : sizeof(int128)). This also naturally covers SSE natural alignment on
30 : x86. 8 could be used if features like int128 and so forth and still
31 : be linguistically conformant (sizeof(ulong) here is the limit).
32 : Likewise, 32, 64, 128 could be used to guarantee all allocations will
33 : have natural AVX/AVX2, natural AVX-512 / cache-line,
34 : adjacent-cache-line-prefetch false sharing avoidance / natural GPU
35 : alignment properties.
36 :
37 : 128 for default was picked as double x86 cache line for ACLPF false
38 : sharing avoidance and for consistency with GPU warp sizes ... i.e.
39 : the default allocation behaviors are naturally interthread
40 : communication false sharing resistant and GPU friendly. This also
41 : naturally covers cases like SSE, AVX, AVX2 and AVX-512. */
42 :
43 18379059 : #define FD_SCRATCH_ALIGN_DEFAULT (128UL) /* integer power-of-2 >=16 */
44 :
45 : /* FD_SCRATCH_{SMEM,FMEM}_ALIGN give the alignment requirements for
46 : the memory regions used to a scratch pad memory. There are not many
47 : restrictions on the SMEM alignment practically other than it be a
48 : reasonable integer power of two. 128 was picked to harmonize with
49 : FD_SCRATCH_ALIGN_DEFAULT (which does have more technical motivations
50 : behind its choice) but this is not strictly required.
51 : FD_SCRATCH_FMEM_ALIGN is required to be sizeof(ulong). */
52 :
53 49158 : #define FD_SCRATCH_SMEM_ALIGN (128UL) /* integer power-of-2, harmonized with ALIGN_DEFAULT */
54 : #define FD_SCRATCH_FMEM_ALIGN (8UL) /* ==sizeof(ulong) but avoids bugs with some compilers */
55 :
56 : FD_PROTOTYPES_BEGIN
57 :
58 : /* Private APIs *******************************************************/
59 :
60 : #if FD_SCRATCH_USE_HANDHOLDING
61 : extern FD_TL int fd_scratch_in_prepare;
62 : #endif
63 :
64 : extern FD_TL ulong fd_scratch_private_start;
65 : extern FD_TL ulong fd_scratch_private_free;
66 : extern FD_TL ulong fd_scratch_private_stop;
67 :
68 : extern FD_TL ulong * fd_scratch_private_frame;
69 : extern FD_TL ulong fd_scratch_private_frame_cnt;
70 : extern FD_TL ulong fd_scratch_private_frame_max;
71 :
72 : FD_FN_CONST static inline int
73 2601757 : fd_scratch_private_align_is_valid( ulong align ) {
74 2601757 : return !(align & (align-1UL)); /* returns true if power or 2 or zero, compile time typically */
75 2601757 : }
76 :
77 : FD_FN_CONST static inline ulong
78 17625218 : fd_scratch_private_true_align( ulong align ) {
79 17625218 : return fd_ulong_if( !align, FD_SCRATCH_ALIGN_DEFAULT, align ); /* compile time typically */
80 17625218 : }
81 :
82 : /* Public APIs ********************************************************/
83 :
84 : /* Constructor APIs */
85 :
86 : /* fd_scratch_smem_{align,footprint} return the alignment and footprint
87 : of a memory region suitable for use as a scratch pad memory that can
88 : hold up to smax bytes. There are very few restrictions on the nature
89 : of this memory. It could even be just a flat address space that is
90 : not backed by an actual physical memory as far as scratch is
91 : concerned. In typical use cases though, the scratch pad memory
92 : should point to a region of huge or gigantic page backed memory on
93 : the caller's numa node.
94 :
95 : A shared memory region for smem is fine for smem. This could be used
96 : for example to allow other threads / processes to access a scratch
97 : allocation from this thread for the lifetime of a scratch allocation.
98 :
99 : Even more generally, a shared memory region for both smem and fmem
100 : could make it is theoretically possible to have a scratch pad memory
101 : that is shared across multiple threads / processes. The API is not
102 : well designed for such though (the main reason to use fmem in shared
103 : memory would be convenience and/or adding hot swapping
104 : functionality). In the common scratch scenario, every thread would
105 : attach to their local join of the shared smem and shared fmem. But
106 : since the operations below are not designed to be thread safe, the
107 : threads would have to protect against concurrent use of push and pop
108 : (and attach would probably need to be tweaked to make it easier to
109 : attach to an already in use scratch pad).
110 :
111 : Compile time allocation is possible via the FD_SCRATCH_SMEM_ALIGN
112 : define. E.g.:
113 :
114 : uchar my_smem[ MY_SMAX ] __attribute__((aligned(FD_SCRATCH_SMEM_ALIGN)));
115 :
116 : will be valid to use as a scratch smem with space for up to MY_SMAX
117 : bytes. */
118 :
119 0 : FD_FN_CONST static inline ulong fd_scratch_smem_align( void ) { return FD_SCRATCH_SMEM_ALIGN; }
120 :
121 : FD_FN_CONST static inline ulong
122 49155 : fd_scratch_smem_footprint( ulong smax ) {
123 49155 : return fd_ulong_align_up( smax, FD_SCRATCH_SMEM_ALIGN );
124 49155 : }
125 :
126 : /* fd_scratch_fmem_{align,footprint} return the alignment and footprint
127 : of a memory region suitable for holding the scratch pad memory
128 : metadata (typically very small). The scratch pad memory will be
129 : capable of holding up to depth scratch frames.
130 :
131 : Compile time allocation is possible via the FD_SCRATCH_FMEM_ALIGN
132 : define. E.g.
133 :
134 : ulong my_fmem[ MY_DEPTH ] __attribute((aligned(FD_SCRATCH_FMEM_ALIGN)));
135 :
136 : or, even simpler:
137 :
138 : ulong my_fmem[ MY_DEPTH ];
139 :
140 : will be valid to use as a scratch fmem with space for up to depth
141 : frames. The attribute variant is not strictly necessary, just for
142 : consistency with the smem above (where it is required). */
143 :
144 0 : FD_FN_CONST static inline ulong fd_scratch_fmem_align ( void ) { return sizeof(ulong); }
145 51 : FD_FN_CONST static inline ulong fd_scratch_fmem_footprint( ulong depth ) { return sizeof(ulong)*depth; }
146 :
147 : /* fd_scratch_attach attaches the calling thread to memory regions
148 : sufficient to hold up to smax (positive) bytes and with up to depth
149 : (positive) frames. smem/fmem should have the required alignment and
150 : footprint specified for smax/depth from the above and be non-NULL).
151 : The caller has a read/write interest in these regions while attached
152 : (and thus the local lifetime of these regions must cover the lifetime
153 : of the attachment). Only one scratch pad memory may be attached to a
154 : caller at a time. This cannot fail from the caller's point of view
155 : (if handholding is enabled, it will abort the caller with a
156 : descriptive error message if used obviously in error). */
157 :
158 : static inline void
159 : fd_scratch_attach( void * smem,
160 : void * fmem,
161 : ulong smax,
162 408504 : ulong depth ) {
163 :
164 : # if FD_SCRATCH_USE_HANDHOLDING
165 0 : if( FD_UNLIKELY( fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "already attached" ));
166 0 : if( FD_UNLIKELY( !smem ) ) FD_LOG_ERR(( "bad smem" ));
167 0 : if( FD_UNLIKELY( !fmem ) ) FD_LOG_ERR(( "bad fmem" ));
168 0 : if( FD_UNLIKELY( !smax ) ) FD_LOG_ERR(( "bad smax" ));
169 0 : if( FD_UNLIKELY( !depth ) ) FD_LOG_ERR(( "bad depth" ));
170 0 : fd_scratch_in_prepare = 0;
171 0 : # endif
172 :
173 0 : fd_scratch_private_start = (ulong)smem;
174 0 : fd_scratch_private_free = fd_scratch_private_start;
175 0 : fd_scratch_private_stop = fd_scratch_private_start + smax;
176 :
177 0 : fd_scratch_private_frame = (ulong *)fmem;
178 0 : fd_scratch_private_frame_cnt = 0UL;
179 0 : fd_scratch_private_frame_max = depth;
180 :
181 : # if FD_HAS_DEEPASAN
182 : /* Poison the entire smem region. Underpoison the boundaries to respect
183 : alignment requirements. */
184 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_ASAN_ALIGN );
185 : ulong aligned_end = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
186 : fd_asan_poison( (void*)aligned_start, aligned_end - aligned_start );
187 : # endif
188 : #if FD_HAS_MSAN
189 : /* Mark the entire smem region as uninitialized. */
190 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_MSAN_ALIGN );
191 : ulong aligned_end = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
192 : fd_msan_poison( (void*)aligned_start, aligned_end - aligned_start );
193 : #endif
194 0 : }
195 :
196 : /* fd_scratch_detach detaches the calling thread from its current
197 : attachment. Returns smem used on attach and, if opt_fmem is
198 : non-NULL, opt_fmem[0] will contain the fmem used on attach on return.
199 :
200 : This relinquishes the calling threads read/write interest on these
201 : memory regions. All the caller's scratch frames are popped, any
202 : prepare in progress is canceled and all the caller's scratch
203 : allocations are freed implicitly by this.
204 :
205 : This cannot fail from the caller's point of view (if handholding is
206 : enabled, it will abort the caller with a descriptive error message if
207 : used obviously in error). */
208 :
209 : static inline void *
210 408504 : fd_scratch_detach( void ** _opt_fmem ) {
211 :
212 : # if FD_SCRATCH_USE_HANDHOLDING
213 0 : if( FD_UNLIKELY( !fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "not attached" ));
214 0 : fd_scratch_in_prepare = 0;
215 0 : # endif
216 :
217 : # if FD_HAS_DEEPASAN
218 : /* Unpoison the entire scratch space. There should now be an underlying
219 : allocation which has not been poisoned. */
220 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_ASAN_ALIGN );
221 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
222 : fd_asan_unpoison( (void*)aligned_start, aligned_stop - aligned_start );
223 : # endif
224 :
225 0 : void * smem = (void *)fd_scratch_private_start;
226 0 : void * fmem = (void *)fd_scratch_private_frame;
227 :
228 0 : fd_scratch_private_start = 0UL;
229 0 : fd_scratch_private_free = 0UL;
230 0 : fd_scratch_private_stop = 0UL;
231 :
232 0 : fd_scratch_private_frame = NULL;
233 0 : fd_scratch_private_frame_cnt = 0UL;
234 0 : fd_scratch_private_frame_max = 0UL;
235 :
236 408504 : if( _opt_fmem ) _opt_fmem[0] = fmem;
237 0 : return smem;
238 408504 : }
239 :
240 : /* User APIs */
241 :
242 : /* fd_scratch_{used,free} returns the number of bytes used/free in the
243 : caller's scratch. Returns 0 if not attached. Because of alignment
244 : overheads, an allocation is guaranteed to succeed if free>=sz+align-1
245 : where align is the actual alignment required for the allocation (e.g.
246 : align==0 -> default, align<min -> min). It is guaranteed to fail if
247 : free<sz. It might succeed or fail in between depending on the
248 : alignments of previously allocations. These are freaky fast (O(3)
249 : fast asm operations under the hood). */
250 :
251 9 : static inline ulong fd_scratch_used( void ) { return fd_scratch_private_free - fd_scratch_private_start; }
252 408459 : static inline ulong fd_scratch_free( void ) { return fd_scratch_private_stop - fd_scratch_private_free; }
253 :
254 : /* fd_scratch_frame_{used,free} returns the number of scratch frames
255 : used/free in the caller's scratch. Returns 0 if not attached. push
256 : is guaranteed to succeed if free is non-zero and guaranteed to fail
257 : otherwise. pop is guaranteed to succeed if used is non-zero and
258 : guaranteed to fail otherwise. These are freaky fast (O(1-3) fast asm
259 : operations under the hood). */
260 :
261 2954484 : static inline ulong fd_scratch_frame_used( void ) { return fd_scratch_private_frame_cnt; }
262 2999381 : static inline ulong fd_scratch_frame_free( void ) { return fd_scratch_private_frame_max - fd_scratch_private_frame_cnt; }
263 :
264 : /* fd_scratch_reset frees all allocations (if any) and pops all scratch
265 : frames (if any) such that the caller's scratch will be in the same
266 : state it was immediately after attach. The caller must be attached
267 : to a scratch memory to use. This cannot fail from the caller's point
268 : of view (if handholding is enabled, it will abort the caller with a
269 : descriptive error message if used obviously in error). This is
270 : freaky fast (O(3) fast asm operations under the hood). */
271 :
272 : static inline void
273 730 : fd_scratch_reset( void ) {
274 : # if FD_SCRATCH_USE_HANDHOLDING
275 : if( FD_UNLIKELY( !fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "not attached" ));
276 : fd_scratch_in_prepare = 0;
277 : # endif
278 730 : fd_scratch_private_free = fd_scratch_private_start;
279 730 : fd_scratch_private_frame_cnt = 0UL;
280 :
281 : /* Poison entire scratch space again. */
282 : # if FD_HAS_DEEPASAN
283 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_ASAN_ALIGN );
284 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
285 : fd_asan_poison( (void*)aligned_start, aligned_stop - aligned_start );
286 : # endif
287 : # if FD_HAS_MSAN
288 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_MSAN_ALIGN );
289 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
290 : fd_msan_poison( (void*)aligned_start, aligned_stop - aligned_start );
291 : # endif
292 730 : }
293 :
294 : /* fd_scratch_push creates a new scratch frame and makes it the current
295 : frame. Assumes caller is attached to a scratch with space for a new
296 : frame. This cannot fail from the caller's point of view (if
297 : handholding is enabled, it will abort the caller with a descriptive
298 : error message if used obviously in error). This is freaky fast (O(5)
299 : fast asm operations under the hood). */
300 :
301 : FD_FN_UNUSED static void /* Work around -Winline */
302 14925349 : fd_scratch_push( void ) {
303 : # if FD_SCRATCH_USE_HANDHOLDING
304 24 : if( FD_UNLIKELY( !fd_scratch_private_frame_max ) ) {
305 0 : FD_LOG_ERR(( "not attached" ));
306 0 : }
307 24 : if( FD_UNLIKELY( fd_scratch_private_frame_cnt>=fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "too many frames" ));
308 24 : fd_scratch_in_prepare = 0;
309 24 : # endif
310 24 : fd_scratch_private_frame[ fd_scratch_private_frame_cnt++ ] = fd_scratch_private_free;
311 :
312 : /* Poison to end of scratch region to account for case of in-prep allocation
313 : getting implictly cancelled. */
314 : # if FD_HAS_DEEPASAN
315 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_free, FD_ASAN_ALIGN );
316 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
317 : fd_asan_poison( (void*)aligned_start, aligned_stop - aligned_start );
318 : # endif
319 : #if FD_HAS_MSAN
320 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_free, FD_MSAN_ALIGN );
321 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
322 : fd_msan_poison( (void*)aligned_start, aligned_stop - aligned_start );
323 : #endif
324 24 : }
325 :
326 : /* fd_scratch_pop frees all allocations in the current scratch frame,
327 : destroys the current scratch frame and makes the previous frame (if
328 : there is one) the current stack frame (and leaves the caller without
329 : a current frame if there is not one). Assumes the caller is attached
330 : to a scratch memory with at least one frame in use. This cannot fail
331 : from the caller's point of view (if handholding is enabled, it will
332 : abort the caller with a descriptive error message if used obviously
333 : in error). This is freaky fast (O(5) fast asm operations under the
334 : hood). */
335 :
336 : FD_FN_UNUSED static void /* Work around -Winline */
337 15496914 : fd_scratch_pop( void ) {
338 : # if FD_SCRATCH_USE_HANDHOLDING
339 24 : if( FD_UNLIKELY( !fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "not attached" ));
340 24 : if( FD_UNLIKELY( !fd_scratch_private_frame_cnt ) ) FD_LOG_ERR(( "unmatched pop" ));
341 24 : fd_scratch_in_prepare = 0;
342 24 : # endif
343 24 : fd_scratch_private_free = fd_scratch_private_frame[ --fd_scratch_private_frame_cnt ];
344 :
345 : # if FD_HAS_DEEPASAN
346 : /* On a pop() operation, the entire range from fd_scratch_private_free to the
347 : end of the scratch space can be safely poisoned. The region must be aligned
348 : to accomodate asan manual poisoning requirements. */
349 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_free, FD_ASAN_ALIGN );
350 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
351 : fd_asan_poison( (void*)aligned_start, aligned_stop - aligned_start );
352 : # endif
353 : #if FD_HAS_MSAN
354 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_free, FD_MSAN_ALIGN );
355 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
356 : fd_msan_poison( (void*)aligned_start, aligned_stop - aligned_start );
357 : #endif
358 24 : }
359 :
360 : /* fd_scratch_prepare starts an allocation of unknown size and known
361 : alignment align (0 means use default alignment) in the caller's
362 : current scratch frame. Returns a pointer in the caller's address
363 : space with alignment align to the first byte of a region with
364 : fd_scratch_free() (as observed after this function returns) bytes
365 : available. The caller is free to clobber any bytes in this region.
366 :
367 : fd_scratch_publish finishes an in-progress allocation. end points at
368 : the first byte after the final allocation. Assumes there is a
369 : matching prepare. A published allocation can be subsequently
370 : trimmed.
371 :
372 : fd_scratch_cancel cancels an in-progress allocation. This is a no-op
373 : if there is no matching prepare. If the prepare had alignment other
374 : than 1, it is possible that some alignment padding needed for the
375 : allocation will still be used in the caller's current scratch frame.
376 : If this is not acceptable, the prepare should use an alignment of 1
377 : and manually align the return.
378 :
379 : This allows idioms like:
380 :
381 : uchar * p = (uchar *)fd_scratch_prepare( align );
382 :
383 : if( FD_UNLIKELY( fd_scratch_free() < app_max_sz ) ) {
384 :
385 : fd_scratch_cancel();
386 :
387 : ... handle too little scratch space to handle application
388 : ... worst case needs here
389 :
390 : } else {
391 :
392 : ... populate sz bytes to p where sz is in [0,app_max_sz]
393 : p += sz;
394 :
395 : fd_scratch_publish( p );
396 :
397 : ... at this point, scratch is as though
398 : ... fd_scratch_alloc( align, sz ) was called above
399 :
400 : }
401 :
402 : Ideally every prepare should be matched with a publish or a cancel,
403 : only one prepare can be in-progress at a time on a thread and prepares
404 : cannot be nested. As such virtually all other scratch operations
405 : will implicitly cancel any in-progress prepare, including attach /
406 : detach / push / pop / prepare / alloc / trim. */
407 :
408 : FD_FN_UNUSED static void * /* Work around -Winline */
409 16630837 : fd_scratch_prepare( ulong align ) {
410 :
411 : # if FD_SCRATCH_USE_HANDHOLDING
412 408474 : if( FD_UNLIKELY( !fd_scratch_private_frame_cnt ) ) FD_LOG_ERR(( "unmatched push" ));
413 408474 : if( FD_UNLIKELY( !fd_scratch_private_align_is_valid( align ) ) ) FD_LOG_ERR(( "bad align (%lu)", align ));
414 408474 : # endif
415 :
416 : # if FD_HAS_DEEPASAN
417 : /* Need 8 byte alignment. */
418 : align = fd_ulong_align_up( align, FD_ASAN_ALIGN );
419 : # endif
420 408474 : ulong true_align = fd_scratch_private_true_align( align );
421 408474 : ulong smem = fd_ulong_align_up( fd_scratch_private_free, true_align );
422 :
423 : # if FD_SCRATCH_USE_HANDHOLDING
424 408474 : if( FD_UNLIKELY( smem < fd_scratch_private_free ) ) FD_LOG_ERR(( "prepare align (%lu) overflow", true_align ));
425 408474 : if( FD_UNLIKELY( smem > fd_scratch_private_stop ) ) FD_LOG_ERR(( "prepare align (%lu) needs %lu additional scratch",
426 408474 : align, smem - fd_scratch_private_stop ));
427 408474 : fd_scratch_in_prepare = 1;
428 408474 : # endif
429 :
430 : # if FD_HAS_DEEPASAN
431 : /* At this point the user is able to clobber any bytes in the region. smem is
432 : always going to be at least 8 byte aligned. */
433 : ulong aligned_sz = fd_ulong_align_up( fd_scratch_private_stop - smem, FD_ASAN_ALIGN );
434 : fd_asan_unpoison( (void*)smem, aligned_sz );
435 : # endif
436 :
437 408474 : fd_scratch_private_free = smem;
438 408474 : return (void *)smem;
439 408474 : }
440 :
441 : static inline void
442 16395822 : fd_scratch_publish( void * _end ) {
443 16395822 : ulong end = (ulong)_end;
444 :
445 : # if FD_SCRATCH_USE_HANDHOLDING
446 408474 : if( FD_UNLIKELY( !fd_scratch_in_prepare ) ) FD_LOG_ERR(( "unmatched prepare" ));
447 408474 : if( FD_UNLIKELY( end < fd_scratch_private_free ) ) FD_LOG_ERR(( "publish underflow" ));
448 408474 : if( FD_UNLIKELY( end > fd_scratch_private_stop ) )
449 0 : FD_LOG_ERR(( "publish needs %lu additional scratch", end-fd_scratch_private_stop ));
450 408474 : fd_scratch_in_prepare = 0;
451 408474 : # endif
452 :
453 : /* Poison everything that is trimmed off. Conservatively poison potentially
454 : less than the region that is trimmed to respect alignment requirements. */
455 : # if FD_HAS_DEEPASAN
456 : ulong aligned_end = fd_ulong_align_up( end, FD_ASAN_ALIGN );
457 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
458 : fd_asan_poison( (void*)aligned_end, aligned_stop - aligned_end );
459 : # endif
460 : # if FD_HAS_MSAN
461 : ulong aligned_end = fd_ulong_align_up( end, FD_MSAN_ALIGN );
462 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
463 : fd_msan_poison( (void*)aligned_end, aligned_stop - aligned_end );
464 : # endif
465 :
466 408474 : fd_scratch_private_free = end;
467 408474 : }
468 :
469 : static inline void
470 188723 : fd_scratch_cancel( void ) {
471 :
472 : # if FD_SCRATCH_USE_HANDHOLDING
473 : if( FD_UNLIKELY( !fd_scratch_in_prepare ) ) FD_LOG_ERR(( "unmatched prepare" ));
474 : fd_scratch_in_prepare = 0;
475 : # endif
476 :
477 188723 : }
478 :
479 : /* fd_scratch_alloc allocates sz bytes with alignment align in the
480 : caller's current scratch frame. There should be no prepare in
481 : progress. Note that this has same function signature as
482 : aligned_alloc (and not by accident). It does have some less
483 : restrictive behaviors though.
484 :
485 : align must be 0 or an integer power of 2. 0 will be treated as
486 : FD_SCRATCH_ALIGN_DEFAULT.
487 :
488 : sz need not be a multiple of align. Further, the underlying
489 : allocator does not implicitly round up sz to an align multiple (as
490 : such, scratch can allocate additional items in any tail padding that
491 : might have been implicitly reserved had it rounded up). That is, if
492 : you really want to round up allocations to a multiple of align, then
493 : manually align up sz ... e.g. pass fd_ulong_align_up(sz,align) when
494 : align is non-zero to this call (this could be implemented as a
495 : compile time mode with some small extra overhead if desirable).
496 :
497 : sz 0 is fine. This will currently return a properly aligned non-NULL
498 : pointer (the allocator might do some allocation under the hood to get
499 : the desired alignment and it is possible this might fail ... there is
500 : a case for returning NULL or an arbitrary but appropriately aligned
501 : non-NULL and this could be implemented as a compile time mode with
502 : some small extra overhead if desirable).
503 :
504 : This cannot fail from the caller's point of view (if handholding is
505 : enabled, it will abort the caller with a descriptive error message if
506 : used obviously in error).
507 :
508 : This is freaky fast (O(5) fast asm operations under the hood). */
509 :
510 : FD_FN_UNUSED static void * /* Work around -Winline */
511 : fd_scratch_alloc( ulong align,
512 15655877 : ulong sz ) {
513 15655877 : ulong smem = (ulong)fd_scratch_prepare( align );
514 15655877 : ulong end = smem + sz;
515 :
516 : # if FD_SCRATCH_USE_HANDHOLDING
517 408474 : if( FD_UNLIKELY( (end < smem) | (end > fd_scratch_private_stop) ) ) FD_LOG_ERR(( "sz (%lu) overflow", sz ));
518 408474 : # endif
519 :
520 408474 : fd_scratch_publish( (void *)end );
521 408474 : return (void *)smem;
522 15655877 : }
523 :
524 : /* fd_scratch_trim trims the size of the most recent scratch allocation
525 : in the current scratch frame (technically it can be used to trim the
526 : size of the entire current scratch frame but doing more than the most
527 : recent scratch allocation is strongly discouraged). Assumes there is
528 : a current scratch frame and the caller is not in a prepare. end
529 : points at the first byte to free in the most recent scratch
530 : allocation (or the first byte after the most recent scratch
531 : allocation). This allows idioms like:
532 :
533 : uchar * p = (uchar *)fd_scratch_alloc( align, max_sz );
534 :
535 : ... populate sz bytes of p where sz is in [0,max_sz]
536 : p += sz;
537 :
538 : fd_scratch_trim( p );
539 :
540 : ... now the thread's scratch is as though original call was
541 : ... p = fd_scratch_alloc( align, sz );
542 :
543 : This cannot fail from the caller's point of view (if handholding is
544 : enabled, this will abort the caller with a descriptive error message
545 : if used obviously in error).
546 :
547 : Note that an allocation be repeatedly trimmed.
548 :
549 : Note also that trim can nest. E.g. a thread can call a function that
550 : uses scratch with its own properly matched scratch pushes and pops.
551 : On function return, trim will still work on the most recent scratch
552 : alloc in that frame by the caller.
553 :
554 : This is freaky fast (O(1) fast asm operations under the hood). */
555 :
556 : static inline void
557 1953709 : fd_scratch_trim( void * _end ) {
558 1953709 : ulong end = (ulong)_end;
559 :
560 : # if FD_SCRATCH_USE_HANDHOLDING
561 : if( FD_UNLIKELY( !fd_scratch_private_frame_cnt ) ) FD_LOG_ERR(( "unmatched push" ));
562 : if( FD_UNLIKELY( end < fd_scratch_private_frame[ fd_scratch_private_frame_cnt-1UL ] ) ) FD_LOG_ERR(( "trim underflow" ));
563 : if( FD_UNLIKELY( end > fd_scratch_private_free ) ) FD_LOG_ERR(( "trim overflow" ));
564 : fd_scratch_in_prepare = 0;
565 : # endif
566 :
567 : # if FD_HAS_DEEPASAN
568 : /* The region to poison should be from _end to the end of the scratch's region.
569 : The same alignment considerations need to be taken into account. */
570 : ulong aligned_end = fd_ulong_align_up( end, FD_ASAN_ALIGN );
571 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
572 : fd_asan_poison( (void*)aligned_end, aligned_stop - aligned_end );
573 : # endif
574 : # if FD_HAS_MSAN
575 : ulong aligned_end = fd_ulong_align_up( end, FD_MSAN_ALIGN );
576 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
577 : fd_msan_poison( (void*)aligned_end, aligned_stop - aligned_end );
578 : # endif
579 :
580 1953709 : fd_scratch_private_free = end;
581 1953709 : }
582 :
583 : /* fd_scratch_*_is_safe returns false (0) if the operation is obviously
584 : unsafe to do at the time of the call or true otherwise.
585 : Specifically:
586 :
587 : fd_scratch_attach_is_safe() returns 1 if the calling thread is not
588 : already attached to scratch.
589 :
590 : fd_scratch_detach_is_safe() returns 1 if the calling thread is
591 : already attached to scratch.
592 :
593 : fd_scratch_reset_is_safe() returns 1 if the calling thread is already
594 : attached to scratch.
595 :
596 : fd_scratch_push_is_safe() returns 1 if there is at least one frame
597 : available and 0 otherwise.
598 :
599 : fd_scratch_pop_is_safe() returns 1 if there is at least one frame
600 : in use and 0 otherwise.
601 :
602 : fd_scratch_prepare_is_safe( align ) returns 1 if there is a current
603 : frame for the allocation and enough scratch pad memory to start
604 : preparing an allocation with alignment align.
605 :
606 : fd_scratch_publish_is_safe( end ) returns 1 if end is a valid
607 : location to complete an allocation in preparation. If handholding is
608 : enabled, will additionally check that there is a prepare already in
609 : progress.
610 :
611 : fd_scratch_cancel_is_safe() returns 1.
612 :
613 : fd_scratch_alloc_is_safe( align, sz ) returns 1 if there is a current
614 : frame for the allocation and enough scratch pad memory for an
615 : allocation with alignment align and size sz.
616 :
617 : fd_scratch_trim_is_safe( end ) returns 1 if there is a current frame
618 : and that current frame can be trimmed to end safely.
619 :
620 : These are safe to call at any time and also freak fast handful of
621 : assembly operations. */
622 :
623 0 : FD_FN_PURE static inline int fd_scratch_attach_is_safe( void ) { return !fd_scratch_private_frame_max; }
624 0 : FD_FN_PURE static inline int fd_scratch_detach_is_safe( void ) { return !!fd_scratch_private_frame_max; }
625 0 : FD_FN_PURE static inline int fd_scratch_reset_is_safe ( void ) { return !!fd_scratch_private_frame_max; }
626 5998546 : FD_FN_PURE static inline int fd_scratch_push_is_safe ( void ) { return fd_scratch_private_frame_cnt<fd_scratch_private_frame_max; }
627 5907812 : FD_FN_PURE static inline int fd_scratch_pop_is_safe ( void ) { return !!fd_scratch_private_frame_cnt; }
628 :
629 : FD_FN_PURE static inline int
630 408450 : fd_scratch_prepare_is_safe( ulong align ) {
631 408450 : if( FD_UNLIKELY( !fd_scratch_private_frame_cnt ) ) return 0; /* No current frame */
632 408450 : if( FD_UNLIKELY( !fd_scratch_private_align_is_valid( align ) ) ) return 0; /* Bad alignment, compile time typically */
633 408450 : ulong true_align = fd_scratch_private_true_align( align ); /* compile time typically */
634 408450 : ulong smem = fd_ulong_align_up( fd_scratch_private_free, true_align );
635 408450 : if( FD_UNLIKELY( smem < fd_scratch_private_free ) ) return 0; /* alignment overflow */
636 408450 : if( FD_UNLIKELY( smem > fd_scratch_private_stop ) ) return 0; /* insufficient scratch */
637 408450 : return 1;
638 408450 : }
639 :
640 : FD_FN_PURE static inline int
641 0 : fd_scratch_publish_is_safe( void * _end ) {
642 0 : ulong end = (ulong)_end;
643 0 : # if FD_SCRATCH_USE_HANDHOLDING
644 0 : if( FD_UNLIKELY( !fd_scratch_in_prepare ) ) return 0; /* Not in prepare */
645 0 : # endif
646 0 : if( FD_UNLIKELY( end < fd_scratch_private_free ) ) return 0; /* Backward */
647 0 : if( FD_UNLIKELY( end > fd_scratch_private_stop ) ) return 0; /* Out of bounds */
648 0 : return 1;
649 0 : }
650 :
651 : FD_FN_CONST static inline int
652 0 : fd_scratch_cancel_is_safe( void ) {
653 0 : return 1;
654 0 : }
655 :
656 : FD_FN_PURE static inline int
657 : fd_scratch_alloc_is_safe( ulong align,
658 2913313 : ulong sz ) {
659 2913313 : if( FD_UNLIKELY( !fd_scratch_private_frame_cnt ) ) return 0; /* No current frame */
660 2601733 : if( FD_UNLIKELY( !fd_scratch_private_align_is_valid( align ) ) ) return 0; /* Bad align, compile time typically */
661 2601733 : ulong true_align = fd_scratch_private_true_align( align ); /* compile time typically */
662 2601733 : ulong smem = fd_ulong_align_up( fd_scratch_private_free, true_align );
663 2601733 : if( FD_UNLIKELY( smem < fd_scratch_private_free ) ) return 0; /* align overflow */
664 2601733 : ulong free = smem + sz;
665 2601733 : if( FD_UNLIKELY( free < smem ) ) return 0; /* sz overflow */
666 2601733 : if( FD_UNLIKELY( free > fd_scratch_private_stop ) ) return 0; /* too little space */
667 753841 : return 1;
668 2601733 : }
669 :
670 : FD_FN_PURE static inline int
671 0 : fd_scratch_trim_is_safe( void * _end ) {
672 0 : ulong end = (ulong)_end;
673 0 : if( FD_UNLIKELY( !fd_scratch_private_frame_cnt ) ) return 0; /* No current frame */
674 0 : if( FD_UNLIKELY( end < fd_scratch_private_frame[ fd_scratch_private_frame_cnt-1UL ] ) ) return 0; /* Trim underflow */
675 0 : if( FD_UNLIKELY( end > fd_scratch_private_free ) ) return 0; /* Trim overflow */
676 0 : return 1;
677 0 : }
678 :
679 : /* fd_scratch_vtable is the virtual function table implementing
680 : fd_valloc for fd_scratch. */
681 :
682 : extern const fd_valloc_vtable_t fd_scratch_vtable;
683 :
684 : /* fd_scratch_virtual returns an abstract handle to the fd_scratch join.
685 : Valid for lifetime of scratch frame. fd_valloc_t must be dropped
686 : before scratch frame changes or scratch detaches. */
687 :
688 : FD_FN_CONST static inline fd_valloc_t
689 0 : fd_scratch_virtual( void ) {
690 0 : fd_valloc_t valloc = { NULL, &fd_scratch_vtable };
691 0 : return valloc;
692 0 : }
693 :
694 : /* FD_SCRATCH_SCOPE_{BEGIN,END} create a `do { ... } while(0);` scope in
695 : which a temporary scratch frame is available. Nested scopes are
696 : permitted. This scratch frame is automatically destroyed when
697 : exiting the scope normally (e.g. by 'break', 'return', or reaching
698 : the end). Uses a dummy variable with a cleanup attribute under the
699 : hood. U.B. if scope is left abnormally (e.g. longjmp(), exception,
700 : abort(), etc.). Use as follows:
701 :
702 : FD_SCRATCH_SCOPE_BEGIN {
703 : ...
704 : fd_scratch_alloc( ... );
705 : ...
706 : }
707 : FD_SCRATCH_SCOPE_END; */
708 :
709 : FD_FN_UNUSED static inline void
710 716186 : fd_scratch_scoped_pop_private( void * _unused ) {
711 716186 : (void)_unused;
712 716186 : fd_scratch_pop();
713 716186 : }
714 :
715 716186 : #define FD_SCRATCH_SCOPE_BEGIN do { \
716 716186 : fd_scratch_push(); \
717 716186 : int __fd_scratch_guard_ ## __LINE__ \
718 716186 : __attribute__((cleanup(fd_scratch_scoped_pop_private))) \
719 716186 : __attribute__((unused)) = 0; \
720 716186 : do
721 :
722 716186 : #define FD_SCRATCH_SCOPE_END while(0); } while(0)
723 :
724 : /* fd_alloca is variant of alloca that works like aligned_alloc. That
725 : is, it returns an allocation of sz bytes with an alignment of at
726 : least align. Like alloca, this allocation will be in the stack frame
727 : of the calling function with a lifetime of until the calling function
728 : returns. Stack overflow handling is likewise identical to alloca
729 : (stack overflows will overlap the top stack guard, typically
730 : triggering a seg fault when the overflow region is touched that will
731 : be caught and handled by the logger to terminate the calling thread
732 : group). As such, like alloca, these really should only be used for
733 : smallish (<< few KiB) quick allocations in bounded recursion depth
734 : circumstances.
735 :
736 : Like fd_scratch_alloc, align must be an 0 or a non-negative integer
737 : power of 2. 0 will be treated as align_default. align smaller than
738 : align_min will be bumped up to align_min.
739 :
740 : The caller promises request will not overflow the stack. This has to
741 : be implemented as a macro for linguistic reasons and align should be
742 : safe against multiple evaluation and, due to compiler limitations,
743 : must be a compile time constant. Returns non-NULL on success and
744 : NULL on failure (in most situations, can never fail from the caller's
745 : POV). sz==0 is okay (and will return non-NULL). */
746 :
747 : #if FD_HAS_ALLOCA
748 :
749 : /* Work around compiler limitations */
750 883644 : #define FD_SCRATCH_PRIVATE_TRUE_ALIGN( align ) ((align) ? (align) : FD_SCRATCH_ALIGN_DEFAULT)
751 :
752 878154 : #define fd_alloca(align,sz) __builtin_alloca_with_align( fd_ulong_max( (sz), 1UL ), \
753 878154 : 8UL*FD_SCRATCH_PRIVATE_TRUE_ALIGN( (align) ) /*bits*/ )
754 :
755 : /* fd_alloca_check does fd_alloca but it will FD_LOG_CRIT with a
756 : detailed message if the request would cause a stack overflow or leave
757 : so little available free stack that subsequent normal thread
758 : operations would be at risk.
759 :
760 : Note that returning NULL on failure is not an option as this would no
761 : longer be a drop-in instrumented replacement for fd_alloca (this
762 : would also require even more linguistic hacks to keep the fd_alloca
763 : at the appropriate scope). Likewise, testing the allocated region is
764 : within the stack post allocation is not an option as the FD_LOG_CRIT
765 : invocation would then try to use stack with the already overflowed
766 : allocation in it (there is no easy portable way to guarantee an
767 : alloca has been freed short of returning from the function in which
768 : the alloca was performed). Using FD_LOG_ERR instead of FD_LOG_CRIT
769 : is a potentially viable alternative error handling behavior though.
770 :
771 : This has to be implemented as a macro for linguistic reasons. It is
772 : recommended this only be used for development / debugging / testing
773 : purposes (e.g. if you are doing alloca in production that are large
774 : enough you are worried about stack overflow, you probably should be
775 : using fd_scratch, fd_alloc or fd_wksp depending on performance and
776 : persistence needs or, better still, architecting to not need any
777 : temporary memory allocations at all). If the caller's stack
778 : diagnostics could not be successfully initialized (this is logged),
779 : this will always FD_LOG_CRIT. */
780 :
781 : #if !FD_HAS_ASAN
782 :
783 : extern FD_TL ulong fd_alloca_check_private_sz;
784 :
785 : #define fd_alloca_check( align, sz ) \
786 5490 : ( fd_alloca_check_private_sz = (sz), \
787 5490 : (__extension__({ \
788 5490 : ulong _fd_alloca_check_private_pad_max = FD_SCRATCH_PRIVATE_TRUE_ALIGN( (align) ) - 1UL; \
789 5490 : ulong _fd_alloca_check_private_footprint = fd_alloca_check_private_sz + _fd_alloca_check_private_pad_max; \
790 5490 : if( FD_UNLIKELY( (_fd_alloca_check_private_footprint < _fd_alloca_check_private_pad_max ) | \
791 5490 : (_fd_alloca_check_private_footprint > (31UL*(fd_tile_stack_est_free() >> 5))) ) ) \
792 5490 : FD_LOG_CRIT(( "fd_alloca_check( " #align ", " #sz " ) stack overflow" )); \
793 5490 : })), \
794 5490 : fd_alloca( (align), fd_alloca_check_private_sz ) )
795 :
796 : #else /* FD_HAS_ASAN */
797 :
798 : /* AddressSanitizer provides its own alloca safety instrumentation
799 : which are more powerful than the above fd_alloca_check heuristics. */
800 :
801 : #define fd_alloca_check fd_alloca
802 :
803 : #endif /* FD_HAS_ASAN */
804 : #endif /* FD_HAS_ALLOCA */
805 :
806 : FD_PROTOTYPES_END
807 :
808 : #endif /* HEADER_fd_src_util_scratch_fd_scratch_h */
|