Line data Source code
1 : #ifndef HEADER_fd_src_util_wksp_fd_wksp_private_h
2 : #define HEADER_fd_src_util_wksp_fd_wksp_private_h
3 :
4 : #include "fd_wksp.h"
5 :
6 : /* If FD_WKSP_LOCK_RECLAIM==0, do not try to recover the lock from
7 : dead processes. This is useful, for example, if we know that the
8 : lock will not get acquired by another process, or that if another
9 : acquiring process dies that all potential users will get exited. It
10 : prevents a syscall on various common workspace paths (eg, alloc). */
11 :
12 : #ifndef FD_WKSP_LOCK_RECLAIM
13 : #define FD_WKSP_LOCK_RECLAIM 0
14 : #endif
15 :
16 : /* FD_WKSP_PRIVATE_PINFO_IDX_NULL is the pinfo index value used to
17 : indicate NULL */
18 :
19 25985572029 : #define FD_WKSP_PRIVATE_PINFO_IDX_NULL ((ulong)UINT_MAX)
20 :
21 : /* A fd_wksp_private_pinfo_t specifies details about a partition in a
22 : workspace and its relationship to other partitions in that workspace.
23 :
24 : FD_WKSP_PRIVATE_PINFO_ALIGN is an integer power of 2 and
25 : FD_WKSP_PRIVATE_PINFO_FOOTPRINT will be a multiple of align.
26 :
27 : If a partition is not idle:
28 :
29 : - [gaddr_lo,gaddr_hi) specify the range of offsets covered by this
30 : partition.
31 :
32 : wksp_gaddr_lo <= gaddr_lo < gaddr_hi <= wksp_gaddr_hi
33 :
34 : such that partitions always have at least 1 byte and are contained
35 : within the workspace's data region.
36 :
37 : - tag==0 indicates this partition is space in the workspace free
38 : for use and the partition is in the free treap, fast findable by
39 : its size. Otherwise, this partition is allocated and the partition
40 : is in the used treap, fast O(1) findable by any in range gaddr.
41 :
42 : - prev_cidx, next_cidx give the index (in compressed form) of the
43 : previous / next partition if present or IDX_NULL if this is the
44 : workspace partition head / tail partition (in which case gaddr_lo /
45 : gaddr_hi will be wksp->gaddr_lo / wksp->gaddr_hi). That is, for
46 : partition idx where idx is in [0,wksp->part_max):
47 :
48 : ulong prev_idx = fd_wksp_private_pinfo_idx( pinfo[ idx ].prev_cidx );
49 : if( fd_wksp_private_pinfo_idx_is_null( prev_idx ) ) {
50 : ... at this point:
51 : ... idx == fd_wksp_private_pinfo_idx( wksp->part_head_cidx ) );
52 : ... pinfo[ idx ].gaddr_lo == wksp->gaddr_lo );
53 : } else {
54 : ... at this point:
55 : ... idx ==_fd_wksp_private_pinfo_idx( pinfo[ prev_idx ].next_cidx );
56 : ... pinfo[ idx ].gaddr_lo == pinfo[ prev_idx ].gaddr_hi;
57 : }
58 :
59 : and:
60 :
61 : ulong next_idx = fd_wksp_private_pinfo_idx( pinfo[ idx ].next_cidx );
62 : if( fd_wksp_private_pinfo_idx_is_null( next_idx ) ) {
63 : ... at this point:
64 : ... idx == fd_wksp_private_pinfo_idx( wksp->part_tail_cidx ) );
65 : ... pinfo[ idx ].gaddr_hi == wksp->gaddr_hi );
66 : } else {
67 : ... at this point:
68 : ... idx ==_fd_wksp_private_pinfo_idx( pinfo[ next_idx ].prev_cidx );
69 : ... pinfo[ idx ].gaddr_hi == pinfo[ next_idx ].gaddr_lo;
70 : }
71 :
72 : - If partition idx is in the used treap, {left,right}_cidx specify
73 : the partition indices of the root of the left and right subtrees.
74 :
75 : The used treap obeys the binary search tree property that all
76 : partitions in the left/right subtree (if any) cover a range of
77 : offsets strictly lower/higher than range covered by idx.
78 :
79 : parent_cidx specifies idx's parent tree (if any). If idx is the
80 : wksp used tree root, parent_cidx will specify IDX_NULL and
81 : wksp->part_used_cidx will specify idx.
82 :
83 : The used treap also obeys the heap property to make it well
84 : balanced on average. Specifically, the idx's parent's heap
85 : priority will be at least idx's heap priority.
86 :
87 : in_same will be 0 and same_cidx will specify IDX_NULL as no used
88 : partitions can overlap.
89 :
90 : - If partition idx is in the free treap, if partition idx is not
91 : in a list of same sized partitions, in_same will be 0 and
92 : {left,right}_cidx specify the partition indices of the root of the
93 : left and right subtrees.
94 :
95 : The free treap obeys the binary search tree property that all
96 : partitions in the left/right subtree (if any) have partition sizes
97 : strictly lower/higher than partition idx's size.
98 :
99 : parent_cidx specifies idx's parent tree (if any). If idx is the
100 : wksp free tree root, parent_cidx will specify IDX_NULL and
101 : wksp->part_free_cidx will specify idx.
102 :
103 : The free treap also obeys the heap property to make it well
104 : balanced on average. Specifically, the idx's parent's heap
105 : priority will be at least idx's heap priority.
106 :
107 : If there are additional partitions of the same size to partition
108 : idx, same_cidx will refer to the next partition of the same size.
109 :
110 : If partition idx is in a list of same sized partitions, in_same
111 : will be 1 and parent_cidx / same_cidx will specify the prev / next
112 : index of additional partitions of the same size. same_cidx will
113 : specify IDX_NULL if no more.
114 :
115 : - heap_prio is a random value used as described above.
116 :
117 : - stack_cidx and cycle_tag are for internal use */
118 :
119 : /* TODO: Consider align 32/ footprint 96 without compressed indices if
120 : ever needing more than ~4B partitions. */
121 :
122 : #define FD_WKSP_PRIVATE_PINFO_ALIGN (64UL) /* At most FD_WKSP_ALIGN */
123 : #define FD_WKSP_PRIVATE_PINFO_FOOTPRINT (64UL)
124 :
125 : struct __attribute__((aligned(FD_WKSP_PRIVATE_PINFO_ALIGN))) fd_wksp_private_pinfo {
126 : ulong gaddr_lo; /* If in idle stack, 0 */
127 : ulong gaddr_hi; /* ", 0 */
128 : ulong tag; /* ", 0 */
129 : uint heap_prio : 31; /* 30 bit priority and 1 bit free to use for infinite priority bulk tree ops */
130 : uint in_same : 1; /* 1 if in a same list and 0 otherwise */
131 : uint prev_cidx; /* ", fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
132 : uint next_cidx; /* ", fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
133 : uint left_cidx; /* ", fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
134 : uint right_cidx; /* ", fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
135 : uint parent_cidx; /* ", cidx of next idle or fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) if no more */
136 : uint same_cidx; /* ", fd_wksp_private_pinfo_cidx( FD_WKSP_INFO_IDX_NULL ) */
137 : uint stack_cidx; /* internal use */
138 : ulong cycle_tag; /* internal use */
139 : };
140 :
141 : typedef struct fd_wksp_private_pinfo fd_wksp_private_pinfo_t;
142 :
143 : /* FD_WKSP_MAGIC is an ideally unique number that specifies the precise
144 : memory layout of a fd_wksp. */
145 :
146 525 : #define FD_WKSP_MAGIC (0xF17EDA2C3731C591UL) /* F17E=FIRE,DA2C/3R<>DANCER,31/C59<>WKSP,0<>0 --> FIRE DANCER WKSP VERSION 1 */
147 :
148 : /* fd_wksp_private specifies the detailed layout of the internals of a
149 : fd_wksp_t */
150 :
151 : struct fd_wksp_private {
152 :
153 : /* This point is FD_WKSP_ALIGN aligned */
154 :
155 : /* This fields are static and mostly in the first cache line */
156 :
157 : ulong magic; /* ==FD_WKSP_MAGIC */
158 : ulong part_max; /* Max wksp partitions */
159 : ulong data_max; /* Data region */
160 : ulong gaddr_lo; /* ==fd_wksp_private_data_off( part_max ), data region covers offsets [gaddr_lo,gaddr_hi) */
161 : ulong gaddr_hi; /* ==gaddr_lo + data_max, offset gaddr_hi is to 1 byte footer */
162 : char name[ FD_SHMEM_NAME_MAX ]; /* (Convenience) backing fd_shmem region cstr name */
163 : uint seed; /* Heap priority random number seed, arbitrary */
164 :
165 : /* These fields are dynamic and in the adjacent cache line */
166 :
167 : uint idle_top_cidx; /* Stack of partition infos not in use, parent_idx is next pointer */
168 : uint part_head_cidx; /* Index for info about the leftmost partition */
169 : uint part_tail_cidx; /* Index for info about the rightmost partition */
170 : uint part_used_cidx; /* Treap of partitions that are currently used (tag!=0), searchable by gaddr */
171 : uint part_free_cidx; /* Treap of partitions that are currently free (tag==0), searchable by size */
172 : ulong cycle_tag; /* Used for cycle detection */
173 : ulong owner; /* thread group id of the owner or NULL otherwise */
174 :
175 : /* IMPORTANT! The "single-source-of-truth" for what is currently
176 : used (and its tags) is the set of non-zero tagged partitions in the
177 : partition info array. The idle stack, partition list, used treap
178 : and free treap are auxiliary data structuring that can be
179 : reconstructed at any time from this single source of truth.
180 :
181 : Conversely, if there accidental or deliberate data corruption of
182 : the wksp metadata resulting in a conflict between what is stored
183 : in the partition info array and the auxiliary data structures,
184 : the partition info array governs. */
185 :
186 : /* Padding to FD_WKSP_PRIVATE_PINFO_ALIGN here */
187 :
188 : /* part_max pinfo here */
189 : /* data_max byte data region here */
190 : /* 1 footer byte here */
191 : /* Padding to FD_WKSP_ALIGN here */
192 : };
193 :
194 : FD_PROTOTYPES_BEGIN
195 :
196 : /* fd_wksp_private_pinfo_sz returns the size of a partition in bytes.
197 : Assumes pinfo points to the pinfo of a partition in a current local
198 : join. Will be positive. */
199 :
200 : FD_FN_PURE static inline ulong
201 7118049648 : fd_wksp_private_pinfo_sz( fd_wksp_private_pinfo_t const * pinfo ) {
202 7118049648 : return pinfo->gaddr_hi - pinfo->gaddr_lo;
203 7118049648 : }
204 :
205 : /* fd_wksp_private_{part,data}_off return the wksp offset of the
206 : pinfo array and the data region. data_off assumes part_max is a
207 : value that will not overflow. */
208 :
209 : FD_FN_CONST static inline ulong
210 0 : fd_wksp_private_pinfo_off( void ) {
211 0 : return 128UL; /* fd_ulong_align_up( sizeof(fd_wksp_t), FD_WKSP_PRIVATE_PINFO_ALIGN ); */
212 0 : }
213 :
214 : FD_FN_CONST static inline ulong
215 4662 : fd_wksp_private_data_off( ulong part_max ) {
216 4662 : return fd_wksp_private_pinfo_off() + part_max*sizeof(fd_wksp_private_pinfo_t);
217 4662 : }
218 :
219 : /* fd_wksp_private_pinfo returns the location of wksp pinfo array in the
220 : caller's address space. Assumes wksp is a current local join.
221 : fd_wksp_private_pinfo_const is a const-correct version. */
222 :
223 : FD_FN_CONST static inline fd_wksp_private_pinfo_t *
224 564334383 : fd_wksp_private_pinfo( fd_wksp_t * wksp ) {
225 564334383 : return (fd_wksp_private_pinfo_t *)(((ulong)wksp) + fd_wksp_private_pinfo_off());
226 564334383 : }
227 :
228 : FD_FN_CONST static inline fd_wksp_private_pinfo_t const *
229 0 : fd_wksp_private_pinfo_const( fd_wksp_t const * wksp ) {
230 0 : return (fd_wksp_private_pinfo_t const *)(((ulong)wksp) + fd_wksp_private_pinfo_off());
231 0 : }
232 :
233 : /* fd_wksp_private_pinfo_{cidx,idx} compresses / uncompresses a pinfo index */
234 :
235 8355794454 : static inline uint fd_wksp_private_pinfo_cidx( ulong idx ) { return (uint) idx; }
236 17115923196 : static inline ulong fd_wksp_private_pinfo_idx ( uint cidx ) { return (ulong)cidx; }
237 :
238 : /* fd_wksp_private_pinfo_idx_is_null returns 1 if idx is
239 : FD_WKSP_PRIVATE_PINFO_IDX_NULL and 0 otherwise */
240 :
241 22121799465 : static inline int fd_wksp_private_pinfo_idx_is_null( ulong idx ) { return idx==FD_WKSP_PRIVATE_PINFO_IDX_NULL; }
242 :
243 : /* pinfo idle stack APIs **********************************************/
244 :
245 : /* fd_wksp_private_idle_stack_is_empty returns 1 if there are no idle
246 : partitions and 0 otherwise. Also returns 1 if corruption is
247 : detected. Assumes wksp is a current local join. */
248 :
249 : static inline int
250 114259155 : fd_wksp_private_idle_stack_is_empty( fd_wksp_t * wksp ) {
251 114259155 : return fd_wksp_private_pinfo_idx( wksp->idle_top_cidx ) >= wksp->part_max;
252 114259155 : }
253 :
254 : /* fd_wksp_private_idle_stack_pop pops an idle partition off wksp's idle
255 : stack. Assumes the caller knows idle stack is not empty. The caller
256 : is promised that the popped partition has [gaddr_lo,gaddr_hi) = [0,0)
257 : tag 0, {prev, next, left, right, same, parent}_cidx specify IDX_NULL.
258 : Further, heap_prio should have been assigned a random value.
259 : stack_idx and cycle_tag are for internal use. */
260 :
261 : static inline ulong /* Assumes in [0,part_max) */
262 : fd_wksp_private_idle_stack_pop( fd_wksp_t * wksp, /* Assumes current local join */
263 112699329 : fd_wksp_private_pinfo_t * pinfo ) { /* == fd_wksp_private_pinfo( wksp ) */
264 112699329 : ulong i = fd_wksp_private_pinfo_idx( wksp->idle_top_cidx );
265 : # if FD_HAS_DEEPASAN
266 : fd_asan_unpoison( &pinfo[ i ], FD_WKSP_PRIVATE_PINFO_FOOTPRINT );
267 : # endif
268 112699329 : wksp->idle_top_cidx = pinfo[ i ].parent_cidx;
269 112699329 : pinfo[ i ].parent_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
270 112699329 : return i;
271 112699329 : }
272 :
273 : /* fd_wksp_private_idle_stack_push pushes partition i onto the idle
274 : stack. Assumes the caller knows i is not currently in the idle
275 : stack, partitioning, used treap or free treap. */
276 :
277 : static inline void
278 : fd_wksp_private_idle_stack_push( ulong i, /* Assumes in [0,part_max) */
279 : fd_wksp_t * wksp, /* Assumes current local join */
280 124465419 : fd_wksp_private_pinfo_t * pinfo ) { /* == fd_wksp_private_pinfo( wksp ) */
281 124465419 : pinfo[ i ].gaddr_lo = 0UL;
282 124465419 : pinfo[ i ].gaddr_hi = 0UL;
283 124465419 : pinfo[ i ].tag = 0U;
284 124465419 : pinfo[ i ].in_same = 0U;
285 124465419 : pinfo[ i ].prev_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
286 124465419 : pinfo[ i ].next_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
287 124465419 : pinfo[ i ].left_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
288 124465419 : pinfo[ i ].right_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
289 124465419 : pinfo[ i ].same_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
290 124465419 : pinfo[ i ].parent_cidx = wksp->idle_top_cidx;
291 124465419 : wksp->idle_top_cidx = fd_wksp_private_pinfo_cidx( i );
292 :
293 : # if FD_HAS_DEEPASAN
294 : fd_asan_poison( &pinfo[ i ], FD_WKSP_PRIVATE_PINFO_FOOTPRINT );
295 : # endif
296 124465419 : }
297 :
298 : /* pinfo used treap APIs **********************************************/
299 :
300 : /* fd_wksp_private_used_treap_query queries wksp's used treap for the
301 : used partition that holds gaddr. On success, returns the requested
302 : partition idx, in [0,part_max), and, on failure, returns IDX_NULL.
303 : Reasons for failure include gaddr is not in a used partition and
304 : internal treap corruption detected. Might consume a wksp cycle tag
305 : and clobber partition cycle tags. Reasonably fast O(lg N) where N is
306 : the number of used partitions. */
307 :
308 : ulong
309 : fd_wksp_private_used_treap_query( ulong gaddr,
310 : fd_wksp_t * wksp,
311 : fd_wksp_private_pinfo_t * pinfo );
312 :
313 : /* fd_wksp_private_used_treap_insert inserts partition n into wksp's
314 : used treap. Assumes n is not in the idle stack, used treap or free
315 : treap. Does not care if n is in the partitioning or not. Reasonably
316 : fast O(lg N) where N is the number of used partitions.
317 :
318 : Partition n should have [gaddr_lo,gaddr_hi) and heap_prio initialized
319 : on entry (heap_prio should be a random value). tag need not be
320 : initialized but it is assumed that the caller will set the tag to its
321 : final value on success to make the partition officially used. This
322 : will initialize {in_same, left, right, same, parent}_cidx. This will
323 : ignore {prev,next}_cidx. This might consume a wksp cycle tag and
324 : clobber partition stack_cidx and cycle_tag fields.
325 :
326 : Returns FD_WKSP_SUCCESS (zero) on success and a FD_WKSP_ERR_*
327 : (negative) on failure (logs details for failure). Reasons for
328 : failure include n is not in [0,part_max), n's range is not in wksp
329 : data region, n was detected as already inserted (this detection is
330 : not guaranteed), treap internal connectivity issues were detected
331 : (complete detection not guaranteed), and n overlaps with at least one
332 : element already inserted into the treap.
333 :
334 : On failure n and the treap itself were not modified (except possibly
335 : clobbering of stack_cidx and cycle_tag). Note that failure reasons
336 : are either user error or memory corruption. This cannot fail in
337 : normal operating circumstances. */
338 :
339 : int
340 : fd_wksp_private_used_treap_insert( ulong n,
341 : fd_wksp_t * wksp, /* Assumes current local join */
342 : fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
343 :
344 : /* fd_wksp_private_used_treap_remove removes partition d from wksp's
345 : used treap. Assumes d in the used treap, not in the free treap, not
346 : in the idle stack. Does not care if d is in the partitioning.
347 : Reasonably fast O(lg N) where N is the number of used partitions.
348 : This might consume a wksp cycle tag and clobber partition stack_cidx
349 : and cycle_tag fields.
350 :
351 : Returns FD_WKSP_SUCCESS (zero) on success and a FD_WKSP_ERR_*
352 : (negative) on failure (logs details for failure). Reasons for
353 : failure include d is not in [0,part_max) and treap internal
354 : connectivity issues were detected (complete detection not
355 : guaranteed).
356 :
357 : Note that failure reasons are either user error or memory corruption.
358 : This cannot fail in normal operating circumstances. */
359 :
360 : int
361 : fd_wksp_private_used_treap_remove( ulong d,
362 : fd_wksp_t * wksp, /* Assumes current local join */
363 : fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
364 :
365 : /* pinfo free treap APIs **********************************************/
366 :
367 : /* fd_wksp_private_free_treap_query queries wksp's free treap for the
368 : smallest partition of at least sz. On success, returns the index of
369 : a partition in the free treap suitable for sz, in [0,part_max), and,
370 : on failure, returns IDX_NULL. Reasons for failure include sz zero,
371 : sz is larger than any free partition, and internal treap corruption
372 : was detected. Might consume a wksp cycle tag and clobber partition
373 : cycle tags. Reasonably fast O(lg N) where N is the number of used
374 : partitions. */
375 :
376 : ulong
377 : fd_wksp_private_free_treap_query( ulong sz,
378 : fd_wksp_t * wksp, /* Assumes current local join */
379 : fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
380 :
381 : /* fd_wksp_private_free_treap_insert inserts partition n into wksp's
382 : free treap. Assumes n is not in the idle stack, used treap or free
383 : treap. Does not care if n is in the partitioning or not. Reasonably
384 : fast O(lg N) where N is the number of partitions in the free treap.
385 :
386 : Partition n should have [gaddr_lo,gaddr_hi) and heap_prio initialized
387 : on entry (heap_prio should be a random value). tag need not be
388 : initialized but it is assumed that the caller will zero the tag
389 : beforehand to make the partition officially free. This will
390 : initialize {in_same, left, right, same, parent}_cidx. This will
391 : ignore {prev,next}_cidx. This might consume a wksp cycle tag and
392 : clobber the partition stack_cidx and cycle_tag fields.
393 :
394 : Returns FD_WKSP_SUCCESS (zero) on success and a FD_WKSP_ERR_*
395 : (negative) on failure (logs details for failure). Reasons for
396 : failure include n is not in [0,part_max), n's range is not in wksp
397 : data region, n's tag is not zero, n was detected as already inserted
398 : (this detection is not guaranteed), treap internal connectivity
399 : issues were detected (complete detection not guaranteed).
400 :
401 : If n's size exactly matches the size of partition already in the
402 : treap, n will be pushed onto that partition's same stack rather than
403 : inserted into the treap.
404 :
405 : On failure n and the treap itself were not modified (except possibly
406 : clobbering of stack_cidx and cycle_tag). Note that failures reasons
407 : are either user error or memory corruption. This has no failures in
408 : normal operating circumstances. */
409 :
410 : int
411 : fd_wksp_private_free_treap_insert( ulong n,
412 : fd_wksp_t * wksp, /* Assumes current local join */
413 : fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
414 :
415 : /* fd_wksp_private_free_treap_same_is_empty returns 1 if the same list
416 : for d is empty and 0 if not. Returns 1 if corruption in detected.
417 : Assumes d is in the free treap. */
418 :
419 : static inline int
420 : fd_wksp_private_free_treap_same_is_empty( ulong d,
421 : fd_wksp_t * wksp, /* Assumes current local join */
422 78568587 : fd_wksp_private_pinfo_t * pinfo ) { /* == fd_wksp_private_pinfo( wksp ) */
423 78568587 : ulong part_max = wksp->part_max;
424 78568587 : return fd_wksp_private_pinfo_idx( pinfo[ d ].same_cidx )>=part_max;
425 78568587 : }
426 :
427 : /* fd_wksp_private_free_treap_same_remove removes the first partition
428 : from d's same list. Assumes the caller knows d's same list is not
429 : empty. The caller is promised that returned partition has the same
430 : size as d. */
431 :
432 : static inline ulong
433 : fd_wksp_private_free_treap_same_remove( ulong d,
434 : fd_wksp_t * wksp, /* Assumes current local join */
435 3750339 : fd_wksp_private_pinfo_t * pinfo ) { /* == fd_wksp_private_pinfo( wksp ) */
436 3750339 : ulong part_max = wksp->part_max;
437 3750339 : ulong i = fd_wksp_private_pinfo_idx( pinfo[ d ].same_cidx );
438 3750339 : ulong j = fd_wksp_private_pinfo_idx( pinfo[ i ].same_cidx );
439 3750339 : /**/ pinfo[ d ].same_cidx = fd_wksp_private_pinfo_cidx( j );
440 3750339 : if( j<part_max ) pinfo[ j ].parent_cidx = fd_wksp_private_pinfo_cidx( d );
441 3750339 : pinfo[ i ].in_same = 0U;
442 3750339 : pinfo[ i ].same_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
443 3750339 : pinfo[ i ].parent_cidx = fd_wksp_private_pinfo_cidx( FD_WKSP_PRIVATE_PINFO_IDX_NULL );
444 3750339 : return i;
445 3750339 : }
446 :
447 : /* fd_wksp_private_free_treap_remove removes partition d from wksp's
448 : free treap. Assumes d in the free treap, not in the used treap, not
449 : in the idle stack. Does not care if d is in the partitioning.
450 : Reasonably fast O(lg N) where N is the number of free partitions.
451 : This might consume a wksp cycle tag and clobber partition stack_cidx
452 : and cycle_tag fields. There is an edge case where d's can be swapped
453 : with another same sized partition.
454 :
455 : Returns FD_WKSP_SUCCESS (zero) on success and a FD_WKSP_ERR_*
456 : (negative) on failure (logs details for failure). Reasons for
457 : failure include d is not in [0,part_max) and treap internal
458 : connectivity issues were detected (complete detection not
459 : guaranteed).
460 :
461 : Note that failure reasons are either user error or memory corruption.
462 : This cannot fail in normal operating circumstances. */
463 :
464 : int
465 : fd_wksp_private_free_treap_remove( ulong d,
466 : fd_wksp_t * wksp, /* Assumes current local join */
467 : fd_wksp_private_pinfo_t * pinfo ); /* == fd_wksp_private_pinfo( wksp ) */
468 :
469 : /* private admin APIs *************************************************/
470 :
471 : /* fd_wksp_private_lock locks wksp. Assumes wksp is a current local
472 : join. If wksp is already locked, this will wait for the caller. If
473 : this detects that the caller died while holding the lock, it will try
474 : to steal the lock from the dead caller and cleanup any incomplete
475 : operation the caller was doing. Returns FD_WKSP_SUCCESS (0) if the
476 : lock was acquired or FD_WKSP_ERR_CORRUPT if the lock could not be
477 : obtained because memory corruption was detected while trying to
478 : recover from a dead caller that corrupted the wksp memory. */
479 :
480 : int
481 : fd_wksp_private_lock( fd_wksp_t * wksp );
482 :
483 : /* fd_wksp_private_unlock unlocks a locked wksp. Assumes wksp is a
484 : current local join and the caller has the lock */
485 :
486 : static inline void
487 564333630 : fd_wksp_private_unlock( fd_wksp_t * wksp ) {
488 564333630 : FD_COMPILER_MFENCE();
489 564333630 : FD_VOLATILE( wksp->owner ) = ULONG_MAX;
490 564333630 : FD_COMPILER_MFENCE();
491 564333630 : }
492 :
493 : /* private checkpt/restore APIs ***************************************/
494 :
495 : /* TODO: Consider making these more general (e.g. part of I/O?) */
496 :
497 : /* fd_wksp_private_checkpt writes size sz buffer buf to the output
498 : stream checkpt. Assumes checkpt is valid and not in a prepare.
499 : Returns 0 on success and non-zero on failure (will be an errno compat
500 : error code). */
501 :
502 : static inline int
503 : fd_wksp_private_checkpt_write( fd_io_buffered_ostream_t * checkpt,
504 : void const * buf,
505 21 : ulong sz ) {
506 21 : return fd_io_buffered_ostream_write( checkpt, buf, sz );
507 21 : }
508 :
509 : /* fd_wksp_private_prepare prepares to write at most max bytes to the
510 : output stream checkpt. Assumes checkpt is valid and not in a prepare
511 : and max is at most checkpt's wbuf_sz. Returns the location in the
512 : caller's address space for preparing the max bytes on success (*_err
513 : will be 0) and NULL on failure (*_err will be an errno compat error
514 : code). */
515 :
516 : static inline void *
517 : fd_wksp_private_checkpt_prepare( fd_io_buffered_ostream_t * checkpt,
518 : ulong max,
519 39 : int * _err ) {
520 39 : if( FD_UNLIKELY( fd_io_buffered_ostream_peek_sz( checkpt )<max ) ) {
521 0 : int err = fd_io_buffered_ostream_flush( checkpt );
522 0 : if( FD_UNLIKELY( err ) ) {
523 0 : *_err = err;
524 0 : return NULL;
525 0 : }
526 : /* At this point, peek_sz==wbuf_sz and wbuf_sz>=max */
527 0 : }
528 : /* At this point, peek_sz>=max */
529 39 : *_err = 0;
530 39 : return fd_io_buffered_ostream_peek( checkpt );
531 39 : }
532 :
533 : /* fd_wksp_private_publish publishes prepared bytes [prepare,next) to
534 : checkpt. Assumes checkpt is in a prepare and the number of bytes to
535 : publish is at most the prepare's max. checkpt will not be in a
536 : prepare on return. */
537 :
538 : static inline void
539 : fd_wksp_private_checkpt_publish( fd_io_buffered_ostream_t * checkpt,
540 39 : void * next ) {
541 39 : fd_io_buffered_ostream_seek( checkpt, (ulong)next - (ulong)fd_io_buffered_ostream_peek( checkpt ) );
542 39 : }
543 :
544 : /* fd_wksp_private_cancels a prepare. Assumes checkpt is valid and in a
545 : prepare. checkpt will not be in a prepare on return. */
546 :
547 0 : static inline void fd_wksp_private_checkpt_cancel( fd_io_buffered_ostream_t * checkpt ) { (void)checkpt; }
548 :
549 : /* fd_wksp_private_checkpt_ulong checkpoints the value v into a checkpt.
550 : p points to the location in a prepare where v should be encoded.
551 : Assumes this location has svw_enc_sz(v) available (at least 1 and at
552 : most 9). Returns the location of the first byte after the encoded
553 : value (will be prep+svw_enc_sz(val)). */
554 :
555 270 : static inline void * fd_wksp_private_checkpt_ulong( void * prep, ulong val ) { return fd_ulong_svw_enc( (uchar *)prep, val ); }
556 :
557 : /* fd_wksp_private_checkpt_buf checkpoints a variable length buffer buf
558 : of size sz into a checkpt. p points to the location in a prepare
559 : region where buf should be encoded. Assumes this location has
560 : svw_enc_sz(sz)+sz bytes available (at least 1+sz and at most 9+sz).
561 : Returns the location of the first byte after the encoded buffer (will
562 : be prep+svw_enc_sz(sz)+sz). Zero sz is fine (and NULL buf is fine if
563 : sz is zero). */
564 :
565 : static inline void *
566 : fd_wksp_private_checkpt_buf( void * prep,
567 : void const * buf,
568 81 : ulong sz ) {
569 81 : prep = fd_wksp_private_checkpt_ulong( (uchar *)prep, sz );
570 81 : if( FD_LIKELY( sz ) ) fd_memcpy( prep, buf, sz );
571 81 : return (uchar *)prep + sz;
572 81 : }
573 :
574 : /* fd_wksp_private_restore_ulong restores a ulong from the stream in.
575 : Returns 0 on success and, on return, *_val will contain the restored
576 : val. Returns non-zero on failure (will be an errno compat error
577 : code) and, on failure, *_val will be zero. This will implicitly read
578 : ahead for future restores. */
579 :
580 : int
581 : fd_wksp_private_restore_ulong( fd_io_buffered_istream_t * in,
582 : ulong * _val );
583 :
584 : /* fd_wksp_private_restore_buf restores a variable length buffer buf of
585 : maximum size buf_max from the stream in. Returns 0 on success and,
586 : on success, buf will contain the buffer and *_buf_sz will contain the
587 : buffer's size (will be in [0,buf_max]). Returns non-zero on failure
588 : (will be an errno compat error code) and, on failure, buf will be
589 : clobbered and *_buf_sz will be zero. This will implicitly read ahead
590 : for future restores. Zero buf_max is fine (and NULL buf is fine if
591 : buf_max is zero). */
592 :
593 : int
594 : fd_wksp_private_restore_buf( fd_io_buffered_istream_t * in,
595 : void * buf,
596 : ulong buf_max,
597 : ulong * _buf_sz );
598 :
599 : FD_PROTOTYPES_END
600 :
601 : #endif /* HEADER_fd_src_util_wksp_fd_wksp_private_h */
|