Line data Source code
1 : #ifndef HEADER_fd_src_util_scratch_fd_scratch_h
2 : #define HEADER_fd_src_util_scratch_fd_scratch_h
3 :
4 : /* APIs for high performance scratch pad memory allocation. There
5 : are two allocators provided. One is fd_alloca, which is an alignment
6 : aware equivalent of alloca. It is meant for use anywhere alloca
7 : would normally be used. This is only available if the built target
8 : has the FD_HAS_ALLOCA capability. The second as fd_scratch_alloc.
9 : It is meant for use in situations that have very complex and large
10 : temporary memory usage. */
11 :
12 : #include "../sanitize/fd_sanitize.h"
13 : #include "../tile/fd_tile.h"
14 : #include "../valloc/fd_valloc.h"
15 :
16 : /* FD_SCRATCH_USE_HANDHOLDING: Define this to non-zero at compile time
17 : to turn on additional run-time checks. */
18 :
19 : #ifndef FD_SCRATCH_USE_HANDHOLDING
20 : #if FD_HAS_DEEPASAN
21 : #define FD_SCRATCH_USE_HANDHOLDING 1
22 : #else
23 : #define FD_SCRATCH_USE_HANDHOLDING 0
24 : #endif
25 : #endif
26 :
27 : /* FD_SCRATCH_ALLOC_ALIGN_DEFAULT is the default alignment to use for
28 : allocations.
29 :
30 : Default should be at least 16 for consistent cross platform behavior
31 : that is language conformant across a wide range of targets (i.e. the
32 : largest primitive type across all possible build ... practically
33 : sizeof(int128)). This also naturally covers SSE natural alignment on
34 : x86. 8 could be used if features like int128 and so forth and still
35 : be linguistically conformant (sizeof(ulong) here is the limit).
36 : Likewise, 32, 64, 128 could be used to guarantee all allocations will
37 : have natural AVX/AVX2, natural AVX-512 / cache-line,
38 : adjacent-cache-line-prefetch false sharing avoidance / natural GPU
39 : alignment properties.
40 :
41 : 128 for default was picked as double x86 cache line for ACLPF false
42 : sharing avoidance and for consistency with GPU warp sizes ... i.e.
43 : the default allocation behaviors are naturally interthread
44 : communication false sharing resistant and GPU friendly. This also
45 : naturally covers cases like SSE, AVX, AVX2 and AVX-512. */
46 :
47 14950955 : #define FD_SCRATCH_ALIGN_DEFAULT (128UL) /* integer power-of-2 >=16 */
48 :
49 : /* FD_SCRATCH_{SMEM,FMEM}_ALIGN give the alignment requirements for
50 : the memory regions used to a scratch pad memory. There are not many
51 : restrictions on the SMEM alignment practically other than it be a
52 : reasonable integer power of two. 128 was picked to harmonize with
53 : FD_SCRATCH_ALIGN_DEFAULT (which does have more technical motivations
54 : behind its choice) but this is not strictly required.
55 : FD_SCRATCH_FMEM_ALIGN is required to be sizeof(ulong). */
56 :
57 49155 : #define FD_SCRATCH_SMEM_ALIGN (128UL) /* integer power-of-2, harmonized with ALIGN_DEFAULT */
58 : #define FD_SCRATCH_FMEM_ALIGN (8UL) /* ==sizeof(ulong) but avoids bugs with some compilers */
59 :
60 : FD_PROTOTYPES_BEGIN
61 :
62 : /* Private APIs *******************************************************/
63 :
64 : #if FD_SCRATCH_USE_HANDHOLDING
65 : extern FD_TL int fd_scratch_in_prepare;
66 : #endif
67 :
68 : extern FD_TL ulong fd_scratch_private_start;
69 : extern FD_TL ulong fd_scratch_private_free;
70 : extern FD_TL ulong fd_scratch_private_stop;
71 :
72 : extern FD_TL ulong * fd_scratch_private_frame;
73 : extern FD_TL ulong fd_scratch_private_frame_cnt;
74 : extern FD_TL ulong fd_scratch_private_frame_max;
75 :
76 : FD_FN_CONST static inline int
77 2596157 : fd_scratch_private_align_is_valid( ulong align ) {
78 2596157 : return !(align & (align-1UL)); /* returns true if power or 2 or zero, compile time typically */
79 2596157 : }
80 :
81 : FD_FN_CONST static inline ulong
82 14193345 : fd_scratch_private_true_align( ulong align ) {
83 14193345 : return fd_ulong_if( !align, FD_SCRATCH_ALIGN_DEFAULT, align ); /* compile time typically */
84 14193345 : }
85 :
86 : /* Public APIs ********************************************************/
87 :
88 : /* Constructor APIs */
89 :
90 : /* fd_scratch_smem_{align,footprint} return the alignment and footprint
91 : of a memory region suitable for use as a scratch pad memory that can
92 : hold up to smax bytes. There are very few restrictions on the nature
93 : of this memory. It could even be just a flat address space that is
94 : not backed by an actual physical memory as far as scratch is
95 : concerned. In typical use cases though, the scratch pad memory
96 : should point to a region of huge or gigantic page backed memory on
97 : the caller's numa node.
98 :
99 : A shared memory region for smem is fine for smem. This could be used
100 : for example to allow other threads / processes to access a scratch
101 : allocation from this thread for the lifetime of a scratch allocation.
102 :
103 : Even more generally, a shared memory region for both smem and fmem
104 : could make it is theoretically possible to have a scratch pad memory
105 : that is shared across multiple threads / processes. The API is not
106 : well designed for such though (the main reason to use fmem in shared
107 : memory would be convenience and/or adding hot swapping
108 : functionality). In the common scratch scenario, every thread would
109 : attach to their local join of the shared smem and shared fmem. But
110 : since the operations below are not designed to be thread safe, the
111 : threads would have to protect against concurrent use of push and pop
112 : (and attach would probably need to be tweaked to make it easier to
113 : attach to an already in use scratch pad).
114 :
115 : Compile time allocation is possible via the FD_SCRATCH_SMEM_ALIGN
116 : define. E.g.:
117 :
118 : uchar my_smem[ MY_SMAX ] __attribute__((aligned(FD_SCRATCH_SMEM_ALIGN)));
119 :
120 : will be valid to use as a scratch smem with space for up to MY_SMAX
121 : bytes. */
122 :
123 0 : FD_FN_CONST static inline ulong fd_scratch_smem_align( void ) { return FD_SCRATCH_SMEM_ALIGN; }
124 :
125 : FD_FN_CONST static inline ulong
126 49155 : fd_scratch_smem_footprint( ulong smax ) {
127 49155 : return fd_ulong_align_up( smax, FD_SCRATCH_SMEM_ALIGN );
128 49155 : }
129 :
130 : /* fd_scratch_fmem_{align,footprint} return the alignment and footprint
131 : of a memory region suitable for holding the scratch pad memory
132 : metadata (typically very small). The scratch pad memory will be
133 : capable of holding up to depth scratch frames.
134 :
135 : Compile time allocation is possible via the FD_SCRATCH_FMEM_ALIGN
136 : define. E.g.
137 :
138 : ulong my_fmem[ MY_DEPTH ] __attribute((aligned(FD_SCRATCH_FMEM_ALIGN)));
139 :
140 : or, even simpler:
141 :
142 : ulong my_fmem[ MY_DEPTH ];
143 :
144 : will be valid to use as a scratch fmem with space for up to depth
145 : frames. The attribute variant is not strictly necessary, just for
146 : consistency with the smem above (where it is required). */
147 :
148 0 : FD_FN_CONST static inline ulong fd_scratch_fmem_align ( void ) { return sizeof(ulong); }
149 51 : FD_FN_CONST static inline ulong fd_scratch_fmem_footprint( ulong depth ) { return sizeof(ulong)*depth; }
150 :
151 : /* fd_scratch_attach attaches the calling thread to memory regions
152 : sufficient to hold up to smax (positive) bytes and with up to depth
153 : (positive) frames. smem/fmem should have the required alignment and
154 : footprint specified for smax/depth from the above and be non-NULL).
155 : The caller has a read/write interest in these regions while attached
156 : (and thus the local lifetime of these regions must cover the lifetime
157 : of the attachment). Only one scratch pad memory may be attached to a
158 : caller at a time. This cannot fail from the caller's point of view
159 : (if handholding is enabled, it will abort the caller with a
160 : descriptive error message if used obviously in error). */
161 :
162 : static inline void
163 : fd_scratch_attach( void * smem,
164 : void * fmem,
165 : ulong smax,
166 54 : ulong depth ) {
167 :
168 : # if FD_SCRATCH_USE_HANDHOLDING
169 0 : if( FD_UNLIKELY( fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "already attached" ));
170 0 : if( FD_UNLIKELY( !smem ) ) FD_LOG_ERR(( "bad smem" ));
171 0 : if( FD_UNLIKELY( !fmem ) ) FD_LOG_ERR(( "bad fmem" ));
172 0 : if( FD_UNLIKELY( !smax ) ) FD_LOG_ERR(( "bad smax" ));
173 0 : if( FD_UNLIKELY( !depth ) ) FD_LOG_ERR(( "bad depth" ));
174 0 : fd_scratch_in_prepare = 0;
175 0 : # endif
176 :
177 0 : fd_scratch_private_start = (ulong)smem;
178 0 : fd_scratch_private_free = fd_scratch_private_start;
179 0 : fd_scratch_private_stop = fd_scratch_private_start + smax;
180 :
181 0 : fd_scratch_private_frame = (ulong *)fmem;
182 0 : fd_scratch_private_frame_cnt = 0UL;
183 0 : fd_scratch_private_frame_max = depth;
184 :
185 : # if FD_HAS_DEEPASAN
186 : /* Poison the entire smem region. Underpoison the boundaries to respect
187 : alignment requirements. */
188 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_ASAN_ALIGN );
189 : ulong aligned_end = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
190 : fd_asan_poison( (void*)aligned_start, aligned_end - aligned_start );
191 : # endif
192 : #if FD_HAS_MSAN
193 : /* Mark the entire smem region as uninitialized. */
194 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_MSAN_ALIGN );
195 : ulong aligned_end = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
196 : fd_msan_poison( (void*)aligned_start, aligned_end - aligned_start );
197 : #endif
198 0 : }
199 :
200 : /* fd_scratch_detach detaches the calling thread from its current
201 : attachment. Returns smem used on attach and, if opt_fmem is
202 : non-NULL, opt_fmem[0] will contain the fmem used on attach on return.
203 :
204 : This relinquishes the calling threads read/write interest on these
205 : memory regions. All the caller's scratch frames are popped, any
206 : prepare in progress is canceled and all the caller's scratch
207 : allocations are freed implicitly by this.
208 :
209 : This cannot fail from the caller's point of view (if handholding is
210 : enabled, it will abort the caller with a descriptive error message if
211 : used obviously in error). */
212 :
213 : static inline void *
214 49 : fd_scratch_detach( void ** _opt_fmem ) {
215 :
216 : # if FD_SCRATCH_USE_HANDHOLDING
217 0 : if( FD_UNLIKELY( !fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "not attached" ));
218 0 : fd_scratch_in_prepare = 0;
219 0 : # endif
220 :
221 : # if FD_HAS_DEEPASAN
222 : /* Unpoison the entire scratch space. There should now be an underlying
223 : allocation which has not been poisoned. */
224 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_ASAN_ALIGN );
225 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
226 : fd_asan_unpoison( (void*)aligned_start, aligned_stop - aligned_start );
227 : # endif
228 :
229 0 : void * smem = (void *)fd_scratch_private_start;
230 0 : void * fmem = (void *)fd_scratch_private_frame;
231 :
232 0 : fd_scratch_private_start = 0UL;
233 0 : fd_scratch_private_free = 0UL;
234 0 : fd_scratch_private_stop = 0UL;
235 :
236 0 : fd_scratch_private_frame = NULL;
237 0 : fd_scratch_private_frame_cnt = 0UL;
238 0 : fd_scratch_private_frame_max = 0UL;
239 :
240 49 : if( _opt_fmem ) _opt_fmem[0] = fmem;
241 0 : return smem;
242 49 : }
243 :
244 : /* User APIs */
245 :
246 : /* fd_scratch_{used,free} returns the number of bytes used/free in the
247 : caller's scratch. Returns 0 if not attached. Because of alignment
248 : overheads, an allocation is guaranteed to succeed if free>=sz+align-1
249 : where align is the actual alignment required for the allocation (e.g.
250 : align==0 -> default, align<min -> min). It is guaranteed to fail if
251 : free<sz. It might succeed or fail in between depending on the
252 : alignments of previously allocations. These are freaky fast (O(3)
253 : fast asm operations under the hood). */
254 :
255 9 : static inline ulong fd_scratch_used( void ) { return fd_scratch_private_free - fd_scratch_private_start; }
256 9 : static inline ulong fd_scratch_free( void ) { return fd_scratch_private_stop - fd_scratch_private_free; }
257 :
258 : /* fd_scratch_frame_{used,free} returns the number of scratch frames
259 : used/free in the caller's scratch. Returns 0 if not attached. push
260 : is guaranteed to succeed if free is non-zero and guaranteed to fail
261 : otherwise. pop is guaranteed to succeed if used is non-zero and
262 : guaranteed to fail otherwise. These are freaky fast (O(1-3) fast asm
263 : operations under the hood). */
264 :
265 2954118 : static inline ulong fd_scratch_frame_used( void ) { return fd_scratch_private_frame_cnt; }
266 2999377 : static inline ulong fd_scratch_frame_free( void ) { return fd_scratch_private_frame_max - fd_scratch_private_frame_cnt; }
267 :
268 : /* fd_scratch_reset frees all allocations (if any) and pops all scratch
269 : frames (if any) such that the caller's scratch will be in the same
270 : state it was immediately after attach. The caller must be attached
271 : to a scratch memory to use. This cannot fail from the caller's point
272 : of view (if handholding is enabled, it will abort the caller with a
273 : descriptive error message if used obviously in error). This is
274 : freaky fast (O(3) fast asm operations under the hood). */
275 :
276 : static inline void
277 734 : fd_scratch_reset( void ) {
278 : # if FD_SCRATCH_USE_HANDHOLDING
279 : if( FD_UNLIKELY( !fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "not attached" ));
280 : fd_scratch_in_prepare = 0;
281 : # endif
282 734 : fd_scratch_private_free = fd_scratch_private_start;
283 734 : fd_scratch_private_frame_cnt = 0UL;
284 :
285 : /* Poison entire scratch space again. */
286 : # if FD_HAS_DEEPASAN
287 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_ASAN_ALIGN );
288 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
289 : fd_asan_poison( (void*)aligned_start, aligned_stop - aligned_start );
290 : # endif
291 : # if FD_HAS_MSAN
292 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_start, FD_MSAN_ALIGN );
293 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
294 : fd_msan_poison( (void*)aligned_start, aligned_stop - aligned_start );
295 : # endif
296 734 : }
297 :
298 : /* fd_scratch_push creates a new scratch frame and makes it the current
299 : frame. Assumes caller is attached to a scratch with space for a new
300 : frame. This cannot fail from the caller's point of view (if
301 : handholding is enabled, it will abort the caller with a descriptive
302 : error message if used obviously in error). This is freaky fast (O(5)
303 : fast asm operations under the hood). */
304 :
305 : FD_FN_UNUSED static void /* Work around -Winline */
306 13355427 : fd_scratch_push( void ) {
307 : # if FD_SCRATCH_USE_HANDHOLDING
308 24 : if( FD_UNLIKELY( !fd_scratch_private_frame_max ) ) {
309 0 : FD_LOG_ERR(( "not attached" ));
310 0 : }
311 24 : if( FD_UNLIKELY( fd_scratch_private_frame_cnt>=fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "too many frames" ));
312 24 : fd_scratch_in_prepare = 0;
313 24 : # endif
314 24 : fd_scratch_private_frame[ fd_scratch_private_frame_cnt++ ] = fd_scratch_private_free;
315 :
316 : /* Poison to end of scratch region to account for case of in-prep allocation
317 : getting implictly cancelled. */
318 : # if FD_HAS_DEEPASAN
319 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_free, FD_ASAN_ALIGN );
320 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
321 : fd_asan_poison( (void*)aligned_start, aligned_stop - aligned_start );
322 : # endif
323 : #if FD_HAS_MSAN
324 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_free, FD_MSAN_ALIGN );
325 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
326 : fd_msan_poison( (void*)aligned_start, aligned_stop - aligned_start );
327 : #endif
328 24 : }
329 :
330 : /* fd_scratch_pop frees all allocations in the current scratch frame,
331 : destroys the current scratch frame and makes the previous frame (if
332 : there is one) the current stack frame (and leaves the caller without
333 : a current frame if there is not one). Assumes the caller is attached
334 : to a scratch memory with at least one frame in use. This cannot fail
335 : from the caller's point of view (if handholding is enabled, it will
336 : abort the caller with a descriptive error message if used obviously
337 : in error). This is freaky fast (O(5) fast asm operations under the
338 : hood). */
339 :
340 : FD_FN_UNUSED static void /* Work around -Winline */
341 13998302 : fd_scratch_pop( void ) {
342 : # if FD_SCRATCH_USE_HANDHOLDING
343 24 : if( FD_UNLIKELY( !fd_scratch_private_frame_max ) ) FD_LOG_ERR(( "not attached" ));
344 24 : if( FD_UNLIKELY( !fd_scratch_private_frame_cnt ) ) FD_LOG_ERR(( "unmatched pop" ));
345 24 : fd_scratch_in_prepare = 0;
346 24 : # endif
347 24 : fd_scratch_private_free = fd_scratch_private_frame[ --fd_scratch_private_frame_cnt ];
348 :
349 : # if FD_HAS_DEEPASAN
350 : /* On a pop() operation, the entire range from fd_scratch_private_free to the
351 : end of the scratch space can be safely poisoned. The region must be aligned
352 : to accomodate asan manual poisoning requirements. */
353 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_free, FD_ASAN_ALIGN );
354 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
355 : fd_asan_poison( (void*)aligned_start, aligned_stop - aligned_start );
356 : # endif
357 : #if FD_HAS_MSAN
358 : ulong aligned_start = fd_ulong_align_up( fd_scratch_private_free, FD_MSAN_ALIGN );
359 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
360 : fd_msan_poison( (void*)aligned_start, aligned_stop - aligned_start );
361 : #endif
362 24 : }
363 :
364 : /* fd_scratch_prepare starts an allocation of unknown size and known
365 : alignment align (0 means use default alignment) in the caller's
366 : current scratch frame. Returns a pointer in the caller's address
367 : space with alignment align to the first byte of a region with
368 : fd_scratch_free() (as observed after this function returns) bytes
369 : available. The caller is free to clobber any bytes in this region.
370 :
371 : fd_scratch_publish finishes an in-progress allocation. end points at
372 : the first byte after the final allocation. Assumes there is a
373 : matching prepare. A published allocation can be subsequently
374 : trimmed.
375 :
376 : fd_scratch_cancel cancels an in-progress allocation. This is a no-op
377 : if there is no matching prepare. If the prepare had alignment other
378 : than 1, it is possible that some alignment padding needed for the
379 : allocation will still be used in the caller's current scratch frame.
380 : If this is not acceptable, the prepare should use an alignment of 1
381 : and manually align the return.
382 :
383 : This allows idioms like:
384 :
385 : uchar * p = (uchar *)fd_scratch_prepare( align );
386 :
387 : if( FD_UNLIKELY( fd_scratch_free() < app_max_sz ) ) {
388 :
389 : fd_scratch_cancel();
390 :
391 : ... handle too little scratch space to handle application
392 : ... worst case needs here
393 :
394 : } else {
395 :
396 : ... populate sz bytes to p where sz is in [0,app_max_sz]
397 : p += sz;
398 :
399 : fd_scratch_publish( p );
400 :
401 : ... at this point, scratch is as though
402 : ... fd_scratch_alloc( align, sz ) was called above
403 :
404 : }
405 :
406 : Ideally every prepare should be matched with a publish or a cancel,
407 : only one prepare can be in-progress at a time on a thread and prepares
408 : cannot be nested. As such virtually all other scratch operations
409 : will implicitly cancel any in-progress prepare, including attach /
410 : detach / push / pop / prepare / alloc / trim. */
411 :
412 : FD_FN_UNUSED static void * /* Work around -Winline */
413 11595905 : fd_scratch_prepare( ulong align ) {
414 :
415 : # if FD_SCRATCH_USE_HANDHOLDING
416 48 : if( FD_UNLIKELY( !fd_scratch_private_frame_cnt ) ) FD_LOG_ERR(( "unmatched push" ));
417 48 : if( FD_UNLIKELY( !fd_scratch_private_align_is_valid( align ) ) ) FD_LOG_ERR(( "bad align (%lu)", align ));
418 48 : # endif
419 :
420 : # if FD_HAS_DEEPASAN
421 : /* Need 8 byte alignment. */
422 : align = fd_ulong_align_up( align, FD_ASAN_ALIGN );
423 : # endif
424 48 : ulong true_align = fd_scratch_private_true_align( align );
425 48 : ulong smem = fd_ulong_align_up( fd_scratch_private_free, true_align );
426 :
427 : # if FD_SCRATCH_USE_HANDHOLDING
428 48 : if( FD_UNLIKELY( smem < fd_scratch_private_free ) ) FD_LOG_ERR(( "prepare align (%lu) overflow", true_align ));
429 48 : if( FD_UNLIKELY( smem > fd_scratch_private_stop ) ) FD_LOG_ERR(( "prepare align (%lu) needs %lu additional scratch",
430 48 : align, smem - fd_scratch_private_stop ));
431 48 : fd_scratch_in_prepare = 1;
432 48 : # endif
433 :
434 : # if FD_HAS_DEEPASAN
435 : /* At this point the user is able to clobber any bytes in the region. smem is
436 : always going to be at least 8 byte aligned. */
437 : ulong aligned_sz = fd_ulong_align_up( fd_scratch_private_stop - smem, FD_ASAN_ALIGN );
438 : fd_asan_unpoison( (void*)smem, aligned_sz );
439 : # endif
440 :
441 48 : fd_scratch_private_free = smem;
442 48 : return (void *)smem;
443 48 : }
444 :
445 : static inline void
446 11370359 : fd_scratch_publish( void * _end ) {
447 11370359 : ulong end = (ulong)_end;
448 :
449 : # if FD_SCRATCH_USE_HANDHOLDING
450 48 : if( FD_UNLIKELY( !fd_scratch_in_prepare ) ) FD_LOG_ERR(( "unmatched prepare" ));
451 48 : if( FD_UNLIKELY( end < fd_scratch_private_free ) ) FD_LOG_ERR(( "publish underflow" ));
452 48 : if( FD_UNLIKELY( end > fd_scratch_private_stop ) )
453 0 : FD_LOG_ERR(( "publish needs %lu additional scratch", end-fd_scratch_private_stop ));
454 48 : fd_scratch_in_prepare = 0;
455 48 : # endif
456 :
457 : /* Poison everything that is trimmed off. Conservatively poison potentially
458 : less than the region that is trimmed to respect alignment requirements. */
459 : # if FD_HAS_DEEPASAN
460 : ulong aligned_free = fd_ulong_align_dn( fd_scratch_private_free, FD_ASAN_ALIGN );
461 : ulong aligned_end = fd_ulong_align_up( end, FD_ASAN_ALIGN );
462 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
463 : fd_asan_poison( (void*)aligned_end, aligned_stop - aligned_end );
464 : fd_asan_unpoison( (void*)aligned_free, aligned_end - aligned_free );
465 : # endif
466 : # if FD_HAS_MSAN
467 : ulong aligned_free = fd_ulong_align_dn( fd_scratch_private_free, FD_ASAN_ALIGN );
468 : ulong aligned_end = fd_ulong_align_up( end, FD_ASAN_ALIGN );
469 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
470 : fd_msan_poison( (void*)aligned_end, aligned_stop - aligned_end );
471 : fd_msan_unpoison( (void*)aligned_free, aligned_end - aligned_free );
472 : # endif
473 :
474 48 : fd_scratch_private_free = end;
475 48 : }
476 :
477 : static inline void
478 189586 : fd_scratch_cancel( void ) {
479 :
480 : # if FD_SCRATCH_USE_HANDHOLDING
481 : if( FD_UNLIKELY( !fd_scratch_in_prepare ) ) FD_LOG_ERR(( "unmatched prepare" ));
482 : fd_scratch_in_prepare = 0;
483 : # endif
484 :
485 189586 : }
486 :
487 : /* fd_scratch_alloc allocates sz bytes with alignment align in the
488 : caller's current scratch frame. There should be no prepare in
489 : progress. Note that this has same function signature as
490 : aligned_alloc (and not by accident). It does have some less
491 : restrictive behaviors though.
492 :
493 : align must be 0 or an integer power of 2. 0 will be treated as
494 : FD_SCRATCH_ALIGN_DEFAULT.
495 :
496 : sz need not be a multiple of align. Further, the underlying
497 : allocator does not implicitly round up sz to an align multiple (as
498 : such, scratch can allocate additional items in any tail padding that
499 : might have been implicitly reserved had it rounded up). That is, if
500 : you really want to round up allocations to a multiple of align, then
501 : manually align up sz ... e.g. pass fd_ulong_align_up(sz,align) when
502 : align is non-zero to this call (this could be implemented as a
503 : compile time mode with some small extra overhead if desirable).
504 :
505 : sz 0 is fine. This will currently return a properly aligned non-NULL
506 : pointer (the allocator might do some allocation under the hood to get
507 : the desired alignment and it is possible this might fail ... there is
508 : a case for returning NULL or an arbitrary but appropriately aligned
509 : non-NULL and this could be implemented as a compile time mode with
510 : some small extra overhead if desirable).
511 :
512 : This cannot fail from the caller's point of view (if handholding is
513 : enabled, it will abort the caller with a descriptive error message if
514 : used obviously in error).
515 :
516 : This is freaky fast (O(5) fast asm operations under the hood). */
517 :
518 : FD_FN_UNUSED static void * /* Work around -Winline */
519 : fd_scratch_alloc( ulong align,
520 11026509 : ulong sz ) {
521 11026509 : ulong smem = (ulong)fd_scratch_prepare( align );
522 11026509 : ulong end = smem + sz;
523 :
524 : # if FD_SCRATCH_USE_HANDHOLDING
525 48 : if( FD_UNLIKELY( (end < smem) | (end > fd_scratch_private_stop) ) ) FD_LOG_ERR(( "sz (%lu) overflow", sz ));
526 48 : # endif
527 :
528 48 : fd_scratch_publish( (void *)end );
529 48 : return (void *)smem;
530 11026509 : }
531 :
532 : /* fd_scratch_trim trims the size of the most recent scratch allocation
533 : in the current scratch frame (technically it can be used to trim the
534 : size of the entire current scratch frame but doing more than the most
535 : recent scratch allocation is strongly discouraged). Assumes there is
536 : a current scratch frame and the caller is not in a prepare. end
537 : points at the first byte to free in the most recent scratch
538 : allocation (or the first byte after the most recent scratch
539 : allocation). This allows idioms like:
540 :
541 : uchar * p = (uchar *)fd_scratch_alloc( align, max_sz );
542 :
543 : ... populate sz bytes of p where sz is in [0,max_sz]
544 : p += sz;
545 :
546 : fd_scratch_trim( p );
547 :
548 : ... now the thread's scratch is as though original call was
549 : ... p = fd_scratch_alloc( align, sz );
550 :
551 : This cannot fail from the caller's point of view (if handholding is
552 : enabled, this will abort the caller with a descriptive error message
553 : if used obviously in error).
554 :
555 : Note that an allocation be repeatedly trimmed.
556 :
557 : Note also that trim can nest. E.g. a thread can call a function that
558 : uses scratch with its own properly matched scratch pushes and pops.
559 : On function return, trim will still work on the most recent scratch
560 : alloc in that frame by the caller.
561 :
562 : This is freaky fast (O(1) fast asm operations under the hood). */
563 :
564 : static inline void
565 757610 : fd_scratch_trim( void * _end ) {
566 757610 : ulong end = (ulong)_end;
567 :
568 : # if FD_SCRATCH_USE_HANDHOLDING
569 : if( FD_UNLIKELY( !fd_scratch_private_frame_cnt ) ) FD_LOG_ERR(( "unmatched push" ));
570 : if( FD_UNLIKELY( end < fd_scratch_private_frame[ fd_scratch_private_frame_cnt-1UL ] ) ) FD_LOG_ERR(( "trim underflow" ));
571 : if( FD_UNLIKELY( end > fd_scratch_private_free ) ) FD_LOG_ERR(( "trim overflow" ));
572 : fd_scratch_in_prepare = 0;
573 : # endif
574 :
575 : # if FD_HAS_DEEPASAN
576 : /* The region to poison should be from _end to the end of the scratch's region.
577 : The same alignment considerations need to be taken into account. */
578 : ulong aligned_end = fd_ulong_align_up( end, FD_ASAN_ALIGN );
579 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_ASAN_ALIGN );
580 : fd_asan_poison( (void*)aligned_end, aligned_stop - aligned_end );
581 : # endif
582 : # if FD_HAS_MSAN
583 : ulong aligned_end = fd_ulong_align_up( end, FD_MSAN_ALIGN );
584 : ulong aligned_stop = fd_ulong_align_dn( fd_scratch_private_stop, FD_MSAN_ALIGN );
585 : fd_msan_poison( (void*)aligned_end, aligned_stop - aligned_end );
586 : # endif
587 :
588 757610 : fd_scratch_private_free = end;
589 757610 : }
590 :
591 : /* fd_scratch_*_is_safe returns false (0) if the operation is obviously
592 : unsafe to do at the time of the call or true otherwise.
593 : Specifically:
594 :
595 : fd_scratch_attach_is_safe() returns 1 if the calling thread is not
596 : already attached to scratch.
597 :
598 : fd_scratch_detach_is_safe() returns 1 if the calling thread is
599 : already attached to scratch.
600 :
601 : fd_scratch_reset_is_safe() returns 1 if the calling thread is already
602 : attached to scratch.
603 :
604 : fd_scratch_push_is_safe() returns 1 if there is at least one frame
605 : available and 0 otherwise.
606 :
607 : fd_scratch_pop_is_safe() returns 1 if there is at least one frame
608 : in use and 0 otherwise.
609 :
610 : fd_scratch_prepare_is_safe( align ) returns 1 if there is a current
611 : frame for the allocation and enough scratch pad memory to start
612 : preparing an allocation with alignment align.
613 :
614 : fd_scratch_publish_is_safe( end ) returns 1 if end is a valid
615 : location to complete an allocation in preparation. If handholding is
616 : enabled, will additionally check that there is a prepare already in
617 : progress.
618 :
619 : fd_scratch_cancel_is_safe() returns 1.
620 :
621 : fd_scratch_alloc_is_safe( align, sz ) returns 1 if there is a current
622 : frame for the allocation and enough scratch pad memory for an
623 : allocation with alignment align and size sz.
624 :
625 : fd_scratch_trim_is_safe( end ) returns 1 if there is a current frame
626 : and that current frame can be trimmed to end safely.
627 :
628 : These are safe to call at any time and also freak fast handful of
629 : assembly operations. */
630 :
631 0 : FD_FN_PURE static inline int fd_scratch_attach_is_safe( void ) { return !fd_scratch_private_frame_max; }
632 0 : FD_FN_PURE static inline int fd_scratch_detach_is_safe( void ) { return !!fd_scratch_private_frame_max; }
633 0 : FD_FN_PURE static inline int fd_scratch_reset_is_safe ( void ) { return !!fd_scratch_private_frame_max; }
634 5998538 : FD_FN_PURE static inline int fd_scratch_push_is_safe ( void ) { return fd_scratch_private_frame_cnt<fd_scratch_private_frame_max; }
635 5907916 : FD_FN_PURE static inline int fd_scratch_pop_is_safe ( void ) { return !!fd_scratch_private_frame_cnt; }
636 :
637 : FD_FN_PURE static inline int
638 0 : fd_scratch_prepare_is_safe( ulong align ) {
639 0 : if( FD_UNLIKELY( !fd_scratch_private_frame_cnt ) ) return 0; /* No current frame */
640 0 : if( FD_UNLIKELY( !fd_scratch_private_align_is_valid( align ) ) ) return 0; /* Bad alignment, compile time typically */
641 0 : ulong true_align = fd_scratch_private_true_align( align ); /* compile time typically */
642 0 : ulong smem = fd_ulong_align_up( fd_scratch_private_free, true_align );
643 0 : if( FD_UNLIKELY( smem < fd_scratch_private_free ) ) return 0; /* alignment overflow */
644 0 : if( FD_UNLIKELY( smem > fd_scratch_private_stop ) ) return 0; /* insufficient scratch */
645 0 : return 1;
646 0 : }
647 :
648 : FD_FN_PURE static inline int
649 0 : fd_scratch_publish_is_safe( void * _end ) {
650 0 : ulong end = (ulong)_end;
651 0 : # if FD_SCRATCH_USE_HANDHOLDING
652 0 : if( FD_UNLIKELY( !fd_scratch_in_prepare ) ) return 0; /* Not in prepare */
653 0 : # endif
654 0 : if( FD_UNLIKELY( end < fd_scratch_private_free ) ) return 0; /* Backward */
655 0 : if( FD_UNLIKELY( end > fd_scratch_private_stop ) ) return 0; /* Out of bounds */
656 0 : return 1;
657 0 : }
658 :
659 : FD_FN_CONST static inline int
660 0 : fd_scratch_cancel_is_safe( void ) {
661 0 : return 1;
662 0 : }
663 :
664 : FD_FN_PURE static inline int
665 : fd_scratch_alloc_is_safe( ulong align,
666 2913356 : ulong sz ) {
667 2913356 : if( FD_UNLIKELY( !fd_scratch_private_frame_cnt ) ) return 0; /* No current frame */
668 2596109 : if( FD_UNLIKELY( !fd_scratch_private_align_is_valid( align ) ) ) return 0; /* Bad align, compile time typically */
669 2596109 : ulong true_align = fd_scratch_private_true_align( align ); /* compile time typically */
670 2596109 : ulong smem = fd_ulong_align_up( fd_scratch_private_free, true_align );
671 2596109 : if( FD_UNLIKELY( smem < fd_scratch_private_free ) ) return 0; /* align overflow */
672 2596109 : ulong free = smem + sz;
673 2596109 : if( FD_UNLIKELY( free < smem ) ) return 0; /* sz overflow */
674 2596109 : if( FD_UNLIKELY( free > fd_scratch_private_stop ) ) return 0; /* too little space */
675 757610 : return 1;
676 2596109 : }
677 :
678 : FD_FN_PURE static inline int
679 0 : fd_scratch_trim_is_safe( void * _end ) {
680 0 : ulong end = (ulong)_end;
681 0 : if( FD_UNLIKELY( !fd_scratch_private_frame_cnt ) ) return 0; /* No current frame */
682 0 : if( FD_UNLIKELY( end < fd_scratch_private_frame[ fd_scratch_private_frame_cnt-1UL ] ) ) return 0; /* Trim underflow */
683 0 : if( FD_UNLIKELY( end > fd_scratch_private_free ) ) return 0; /* Trim overflow */
684 0 : return 1;
685 0 : }
686 :
687 : /* fd_scratch_vtable is the virtual function table implementing
688 : fd_valloc for fd_scratch. */
689 :
690 : extern const fd_valloc_vtable_t fd_scratch_vtable;
691 :
692 : /* fd_scratch_virtual returns an abstract handle to the fd_scratch join.
693 : Valid for lifetime of scratch frame. fd_valloc_t must be dropped
694 : before scratch frame changes or scratch detaches. */
695 :
696 : FD_FN_CONST static inline fd_valloc_t
697 0 : fd_scratch_virtual( void ) {
698 0 : fd_valloc_t valloc = { NULL, &fd_scratch_vtable };
699 0 : return valloc;
700 0 : }
701 :
702 : /* FD_SCRATCH_SCOPE_{BEGIN,END} create a `do { ... } while(0);` scope in
703 : which a temporary scratch frame is available. Nested scopes are
704 : permitted. This scratch frame is automatically destroyed when
705 : exiting the scope normally (e.g. by 'break', 'return', or reaching
706 : the end). Uses a dummy variable with a cleanup attribute under the
707 : hood. U.B. if scope is left abnormally (e.g. longjmp(), exception,
708 : abort(), etc.). Use as follows:
709 :
710 : FD_SCRATCH_SCOPE_BEGIN {
711 : ...
712 : fd_scratch_alloc( ... );
713 : ...
714 : }
715 : FD_SCRATCH_SCOPE_END; */
716 :
717 : FD_FN_UNUSED static inline void
718 79 : fd_scratch_scoped_pop_private( void * _unused ) {
719 79 : (void)_unused;
720 79 : fd_scratch_pop();
721 79 : }
722 :
723 79 : #define FD_SCRATCH_SCOPE_BEGIN do { \
724 79 : fd_scratch_push(); \
725 79 : int __fd_scratch_guard_ ## __LINE__ \
726 79 : __attribute__((cleanup(fd_scratch_scoped_pop_private))) \
727 79 : __attribute__((unused)) = 0; \
728 79 : do
729 :
730 79 : #define FD_SCRATCH_SCOPE_END while(0); } while(0)
731 :
732 : /* fd_alloca is variant of alloca that works like aligned_alloc. That
733 : is, it returns an allocation of sz bytes with an alignment of at
734 : least align. Like alloca, this allocation will be in the stack frame
735 : of the calling function with a lifetime of until the calling function
736 : returns. Stack overflow handling is likewise identical to alloca
737 : (stack overflows will overlap the top stack guard, typically
738 : triggering a seg fault when the overflow region is touched that will
739 : be caught and handled by the logger to terminate the calling thread
740 : group). As such, like alloca, these really should only be used for
741 : smallish (<< few KiB) quick allocations in bounded recursion depth
742 : circumstances.
743 :
744 : Like fd_scratch_alloc, align must be an 0 or a non-negative integer
745 : power of 2. 0 will be treated as align_default. align smaller than
746 : align_min will be bumped up to align_min.
747 :
748 : The caller promises request will not overflow the stack. This has to
749 : be implemented as a macro for linguistic reasons and align should be
750 : safe against multiple evaluation and, due to compiler limitations,
751 : must be a compile time constant. Returns non-NULL on success and
752 : NULL on failure (in most situations, can never fail from the caller's
753 : POV). sz==0 is okay (and will return non-NULL). */
754 :
755 : #if FD_HAS_ALLOCA
756 :
757 : /* Work around compiler limitations */
758 33 : #define FD_SCRATCH_PRIVATE_TRUE_ALIGN( align ) ((align) ? (align) : FD_SCRATCH_ALIGN_DEFAULT)
759 :
760 18 : #define fd_alloca(align,sz) __builtin_alloca_with_align( fd_ulong_max( (sz), 1UL ), \
761 18 : 8UL*FD_SCRATCH_PRIVATE_TRUE_ALIGN( (align) ) /*bits*/ )
762 :
763 : /* fd_alloca_check does fd_alloca but it will FD_LOG_CRIT with a
764 : detailed message if the request would cause a stack overflow or leave
765 : so little available free stack that subsequent normal thread
766 : operations would be at risk.
767 :
768 : Note that returning NULL on failure is not an option as this would no
769 : longer be a drop-in instrumented replacement for fd_alloca (this
770 : would also require even more linguistic hacks to keep the fd_alloca
771 : at the appropriate scope). Likewise, testing the allocated region is
772 : within the stack post allocation is not an option as the FD_LOG_CRIT
773 : invocation would then try to use stack with the already overflowed
774 : allocation in it (there is no easy portable way to guarantee an
775 : alloca has been freed short of returning from the function in which
776 : the alloca was performed). Using FD_LOG_ERR instead of FD_LOG_CRIT
777 : is a potentially viable alternative error handling behavior though.
778 :
779 : This has to be implemented as a macro for linguistic reasons. It is
780 : recommended this only be used for development / debugging / testing
781 : purposes (e.g. if you are doing alloca in production that are large
782 : enough you are worried about stack overflow, you probably should be
783 : using fd_scratch, fd_alloc or fd_wksp depending on performance and
784 : persistence needs or, better still, architecting to not need any
785 : temporary memory allocations at all). If the caller's stack
786 : diagnostics could not be successfully initialized (this is logged),
787 : this will always FD_LOG_CRIT. */
788 :
789 : #if !FD_HAS_ASAN
790 :
791 : extern FD_TL ulong fd_alloca_check_private_sz;
792 :
793 : #define fd_alloca_check( align, sz ) \
794 15 : ( fd_alloca_check_private_sz = (sz), \
795 15 : (__extension__({ \
796 15 : ulong _fd_alloca_check_private_pad_max = FD_SCRATCH_PRIVATE_TRUE_ALIGN( (align) ) - 1UL; \
797 15 : ulong _fd_alloca_check_private_footprint = fd_alloca_check_private_sz + _fd_alloca_check_private_pad_max; \
798 15 : if( FD_UNLIKELY( (_fd_alloca_check_private_footprint < _fd_alloca_check_private_pad_max ) | \
799 15 : (_fd_alloca_check_private_footprint > (31UL*(fd_tile_stack_est_free() >> 5))) ) ) \
800 15 : FD_LOG_CRIT(( "fd_alloca_check( " #align ", " #sz " ) stack overflow" )); \
801 15 : })), \
802 15 : fd_alloca( (align), fd_alloca_check_private_sz ) )
803 :
804 : #else /* FD_HAS_ASAN */
805 :
806 : /* AddressSanitizer provides its own alloca safety instrumentation
807 : which are more powerful than the above fd_alloca_check heuristics. */
808 :
809 : #define fd_alloca_check fd_alloca
810 :
811 : #endif /* FD_HAS_ASAN */
812 : #endif /* FD_HAS_ALLOCA */
813 :
814 : FD_PROTOTYPES_END
815 :
816 : #endif /* HEADER_fd_src_util_scratch_fd_scratch_h */
|