Line data Source code
1 : #ifndef HEADER_fd_src_tango_dcache_fd_dcache_h
2 : #define HEADER_fd_src_tango_dcache_fd_dcache_h
3 :
4 : #include "../fd_tango_base.h"
5 :
6 : /* FD_DCACHE_{ALIGN,FOOTPRINT} specify the alignment and footprint
7 : needed for a dcache with a data region of data_sz bytes and an
8 : application region of app_sz bytes. ALIGN is at least FD_CHUNK_ALIGN
9 : and recommended to be at least double cache line to mitigate various
10 : kinds of false sharing. data_sz and app_sz are assumed to be valid
11 : (e.g. will not require a footprint larger than ULONG_MAX). These are
12 : provided to facilitate compile time dcache declarations. */
13 :
14 6037266 : #define FD_DCACHE_ALIGN (128UL)
15 : #define FD_DCACHE_FOOTPRINT( data_sz, app_sz ) \
16 : FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_INIT, \
17 : FD_DCACHE_ALIGN, 128UL ), /* hdr */ \
18 : FD_DCACHE_ALIGN, FD_DCACHE_GUARD_FOOTPRINT ), /* guard */ \
19 : FD_DCACHE_ALIGN, (data_sz) ), /* data */ \
20 : FD_DCACHE_ALIGN, (app_sz) ), /* app */ \
21 : FD_DCACHE_ALIGN )
22 :
23 : /* FD_DCACHE_GUARD_FOOTPRINT specify the footprint of the guard region
24 : immediately before the dcache data region. The guard region
25 : footprint is FD_DCACHE_ALIGN aligned and a FD_DCACHE_ALIGN multiple.
26 : It provides flexibility (up to the magnitude of the footprint) to
27 : align how a producer might write directly into a dcache such that the
28 : frag payload alignment a consumer sees is consistent regardless of
29 : the details of the underlying producer. */
30 :
31 12 : #define FD_DCACHE_GUARD_FOOTPRINT (128UL)
32 :
33 : /* FD_DCACHE_SLOT_FOOTPRINT returns the footprint of a FD_DCACHE_ALIGN
34 : aligned slot sufficient to hold a frag payload of up to mtu bytes.
35 : Returns 0 if mtu is not valid (i.e. so large that the required slot
36 : size is larger than ULONG_MAX). */
37 :
38 2878419 : #define FD_DCACHE_SLOT_FOOTPRINT( mtu ) FD_ULONG_ALIGN_UP( (mtu), FD_DCACHE_ALIGN )
39 :
40 : /* FD_DCACHE_REQ_DATA_SZ returns the size of a data region in bytes
41 : sufficient for a dcache whose producer writes frag payloads up to mtu
42 : (should be positive) bytes in size, that can have up to depth (should
43 : be positive) frag payloads visible to consumers while the producer
44 : can be concurrently preparing up to burst (should be positive) frag
45 : payloads. Assumes mtu, depth, burst and compact are valid and
46 : payload footprints are rounded up to at most a FD_DCACHE_ALIGN
47 : multiple when written by a producer. Note that payloads written by a
48 : producer will generally be at least FD_DCACHE_ALIGN aligned to
49 : facilitate interoperability with fd_frag_meta_t chunk indexing. Also
50 : note that for a compactly stored ring, it is usually not useful to
51 : use a burst larger than 1 (but not particularly harmful outside
52 : resulting a data region larger than necessary ... might use it to
53 : quasi-batch publish frags). */
54 :
55 : #define FD_DCACHE_REQ_DATA_SZ( mtu, depth, burst, compact ) (FD_DCACHE_SLOT_FOOTPRINT( mtu )*((depth)+(burst)+(ulong)!!(compact)))
56 :
57 : FD_PROTOTYPES_BEGIN
58 :
59 : /* Construction API */
60 :
61 : /* fd_dcache_req_data_sz is the same as FD_DCACHE_REQ_DATA_SZ but does
62 : not assume valid arguments. Returns sz on success or 0 on failure.
63 : Reasons for failure include zero mtu, too large mtu, zero depth, zero
64 : burst or the required data_sz would be larger than ULONG_MAX. */
65 :
66 : FD_FN_CONST ulong
67 : fd_dcache_req_data_sz( ulong mtu,
68 : ulong depth,
69 : ulong burst,
70 : int compact );
71 :
72 : /* fd_dcache_{align,footprint} return the required alignment and
73 : footprint of a memory region suitable for use as dcache with a data
74 : region of data_sz bytes and an application region of app_sz bytes.
75 : align returns FD_DCACHE_ALIGN. If data_sz or app_sz are invalid
76 : (e.g. the required footprint is larger than a ULONG_MAX), footprint
77 : will silently return 0 (and thus can be used by the caller to
78 : validate dcache configuration parameters). Zero is valid for data_sz
79 : and/or app_sz. */
80 :
81 : FD_FN_CONST ulong
82 : fd_dcache_align( void );
83 :
84 : FD_FN_CONST ulong
85 : fd_dcache_footprint( ulong data_sz,
86 : ulong app_sz );
87 :
88 : /* fd_dcache_new formats an unused memory region for use as a dcache.
89 : shmem is a non-NULL pointer to this region in the local address space
90 : with the required footprint and alignment. The size of the dcache
91 : data size region is data_sz bytes and the size of the application
92 : region is app_sz bytes. Zero is valid for data_sz and/or app_sz.
93 :
94 : Returns shmem (and the memory region it points to will be formatted
95 : as a dcache with the data and application regions initialized to
96 : zero, caller is not joined) on success and NULL on failure (logs
97 : details). Reasons for failure include obviously bad shmem, bad
98 : data_sz or bad app_sz. */
99 :
100 : void *
101 : fd_dcache_new( void * shmem,
102 : ulong data_sz,
103 : ulong app_sz );
104 :
105 : /* fd_dcache_join joins the caller to the dcache. shdcache points to
106 : the first byte of the memory region backing the dcache in the
107 : caller's address space.
108 :
109 : Returns a pointer in the local address space to the dcache's data
110 : region on success (IMPORTANT! THIS IS NOT JUST A CAST OF SHDCACHE)
111 : and NULL on failure (logs details). Reasons for failure are that
112 : shdcache is obviously not a pointer to memory region holding a
113 : dcache. Every successful join should have a matching leave. The
114 : lifetime of the join is until the matching leave or the thread group
115 : is terminated.
116 :
117 : This region will have a guard region of FD_DCACHE_GUARD_FOOTPRINT
118 : just before it and data_sz bytes available after it. */
119 :
120 : uchar *
121 : fd_dcache_join( void * shdcache );
122 :
123 : /* fd_dcache_leave leaves a current local join. Returns a pointer to
124 : the underlying shared memory region on success (IMPORTANT! THIS IS
125 : NOT JUST A CAST OF DCACHE) and NULL on failure (logs details).
126 : Reasons for failure include dcache is NULL. */
127 :
128 : void *
129 : fd_dcache_leave( uchar const * dcache );
130 :
131 : /* fd_dcache_delete unformats a memory region used as a dcache. Assumes
132 : nobody is joined to the region. Returns a pointer to the underlying
133 : shared memory region or NULL if used obviously in error (e.g.
134 : shdcache is obviously not a dcache ... logs details). The ownership
135 : of the memory region is transferred to the caller. */
136 :
137 : void *
138 : fd_dcache_delete( void * shdcache );
139 :
140 : /* Accessor API */
141 :
142 : /* fd_dcache_{data_sz,app_sz} return the sizes of the {data,app}
143 : regions. Assumes dcache is a current local join. */
144 :
145 : FD_FN_PURE ulong fd_dcache_data_sz( uchar const * dcache );
146 : FD_FN_PURE ulong fd_dcache_app_sz ( uchar const * dcache );
147 :
148 : /* fd_dcache_app_laddr returns location in the caller's local address
149 : space of memory set aside for application specific usage. Assumes
150 : dcache is a current local join. The lifetime of the returned pointer
151 : is the same as the underlying join. This region has FD_DCACHE_ALIGN
152 : alignment (double cache line) and is fd_cache_app_sz( dcache ) in
153 : size. laddr_const is a const-correct version. */
154 :
155 : FD_FN_PURE uchar const * fd_dcache_app_laddr_const( uchar const * dcache );
156 : FD_FN_PURE uchar * fd_dcache_app_laddr ( uchar * dcache );
157 :
158 : /* fd_dcache_compact_is_safe return whether the dcache can safely store
159 : frags in compactly quasi ring like as described in
160 : fd_dcache_chunk_next below.
161 :
162 : Chunks are indexed relative to base (e.g. the wksp containing the
163 : dcache to facilitate multiple dcaches written by multiple producers
164 : concurrently in the same wksp using a common chunk indexing scheme at
165 : consumers ... base==dcache is fine and implies chunks in this dcache
166 : region will be indexed starting from zero).
167 :
168 : base and dcache should be double chunk aligned, dcache should be
169 : current local join, base and dcache should be relatively spaced
170 : identically between different thread groups that might use the chunk
171 : indices and sufficiently close in the local address space that the
172 : all data region chunk addresses can be losslessly compressed and
173 : shared via a 32-bit fd_frag_meta_t chunk field.
174 :
175 : mtu is the maximum frag that a producer might write into this dcache.
176 : It is assumed that the producer will round up the footprint of frags
177 : into the dcache into double chunk aligned boundaries.
178 :
179 : depth is the maximum number of frags that might be concurrently
180 : accessing frags in this dcache.
181 :
182 : Returns 1 if the dcache is safe and 0 if not (with details logged). */
183 :
184 : int
185 : fd_dcache_compact_is_safe( void const * base,
186 : void const * dcache,
187 : ulong mtu,
188 : ulong depth );
189 :
190 : /* fd_dcache_compact_{chunk0,chunk1,wmark} returns the range of chunk indices
191 : [chunk0,chunk1) that relative to the base address covered by the
192 : dcache's data region and watermark chunk index for use by
193 : fd_dcache_compact_chunk_next below.
194 : 0<=chunk0<=wmark<=chunk1<=UINT_MAX. These assume dcache is current
195 : local join and the base / dcache pass fd_dcache_is_compact_safe
196 : above. */
197 :
198 : FD_FN_CONST static inline ulong
199 : fd_dcache_compact_chunk0( void const * base,
200 2148 : void const * dcache ) {
201 2148 : return ((ulong)dcache - (ulong)base) >> FD_CHUNK_LG_SZ;
202 2148 : }
203 :
204 : FD_FN_PURE static inline ulong
205 : fd_dcache_compact_chunk1( void const * base,
206 4284 : void const * dcache ) {
207 4284 : return ((ulong)dcache + fd_dcache_data_sz( (uchar const *)dcache ) - (ulong)base) >> FD_CHUNK_LG_SZ;
208 4284 : }
209 :
210 : FD_FN_PURE static inline ulong
211 : fd_dcache_compact_wmark( void const * base,
212 : void const * dcache,
213 2145 : ulong mtu ) {
214 2145 : ulong chunk_mtu = ((mtu + 2UL*FD_CHUNK_SZ-1UL) >> (1+FD_CHUNK_LG_SZ)) << 1;
215 2145 : return fd_dcache_compact_chunk1( base, dcache ) - chunk_mtu;
216 2145 : }
217 :
218 : /* fd_dcache_compact_chunk_next:
219 :
220 : Let a dcache have space for at least chunk_mtu*(depth+2)-1 chunks
221 : where chunks are indexed [chunk0,chunk1) and chunk_mtu is a
222 : sufficient number of chunks to hold the worst case frag size.
223 : Further, let the dcache's producer write frags into the dcache at
224 : chunk aligned positions with a footprint of at most chunk_mtu chunks
225 : (with one exception noted below). Lastly, let the producer write
226 : frags contiguously into the dcache such that consumers do not need to
227 : do any special handling for frags that wrap around the end of the
228 : dcache.
229 :
230 : Since the producer does not necessarily know the size of a frag as it
231 : is producing it but does know a priori the maximum size of a frag it
232 : might produce, the producer can achieve this by making the first
233 : chunk of any frag it writes in:
234 :
235 : [chunk0,wmark]
236 :
237 : where:
238 :
239 : wmark = chunk1 - chunk_mtu
240 :
241 : This is equivalent to saying that, if there are at least chunk_mtu
242 : chunks until the end of a dcache after a frag, that frag's footprint
243 : will be enough contiguous chunks to cover the frag (up to chunk_mtu).
244 : But if there are less than chunk_mtu chunks, that frag's footprint
245 : will be until the end of the dcache.
246 :
247 : This implies, in the worst case, there at least depth+1 chunk_mtu
248 : footprint frags (those not near the end) and 1 frag with a
249 : 2*chunk_mtu-1 footprint (the one frag nearest the dcache end) in the
250 : dcache. depth of these are exposed to consumers and 1 in preparation
251 : by the producer. It also implies that the set of chunks in the
252 : dcache in use is cyclically contiguous starting from the oldest
253 : consumer exposed frag until the currently exposed frag.
254 :
255 : Noting that the act of publishing in the in preparation frag also
256 : unpublishes the oldest exposed frag. Given the above, this
257 : guarantees that there is at least chunk_mtu contiguous space
258 : available for use by the next frag so long as chunk_mtu is large
259 : enough to cover the worst case frag and the dcache has room at least
260 : for chunk_mtu*(depth+2)-1 chunks. */
261 :
262 : FD_FN_CONST static inline ulong /* Will be in [chunk0,wmark] */
263 : fd_dcache_compact_next( ulong chunk, /* Assumed in [chunk0,wmark] */
264 : ulong sz, /* Assumed in [0,mtu] */
265 : ulong chunk0, /* From fd_dcache_compact_chunk0 */
266 77099484 : ulong wmark ) { /* From fd_dcache_compact_wmark */
267 77099484 : chunk += ((sz+(2UL*FD_CHUNK_SZ-1UL)) >> (1+FD_CHUNK_LG_SZ)) << 1; /* Advance to next chunk pair, no overflow if init passed */
268 77099484 : return fd_ulong_if( chunk>wmark, chunk0, chunk ); /* If that goes over the high water mark, wrap to zero */
269 77099484 : }
270 :
271 : FD_PROTOTYPES_END
272 :
273 : #endif /* HEADER_fd_src_tango_dcache_fd_dcache_h */
274 :
|