Line data Source code
1 : #ifndef HEADER_fd_src_tango_dcache_fd_dcache_h
2 : #define HEADER_fd_src_tango_dcache_fd_dcache_h
3 :
4 : #include "../fd_tango_base.h"
5 :
6 : /* FD_DCACHE_{ALIGN,FOOTPRINT} specify the alignment and footprint
7 : needed for a dcache with a data region of data_sz bytes and an
8 : application region of app_sz bytes. ALIGN is at least FD_CHUNK_ALIGN
9 : and recommended to be at least double cache line to mitigate various
10 : kinds of false sharing. data_sz and app_sz are assumed to be valid
11 : (e.g. will not require a footprint larger than ULONG_MAX). These are
12 : provided to facilitate compile time dcache declarations. */
13 :
14 6048003 : #define FD_DCACHE_ALIGN (4096UL)
15 : #define FD_DCACHE_FOOTPRINT( data_sz, app_sz ) \
16 : FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_INIT, \
17 : FD_DCACHE_SLOT_ALIGN, 128UL ), /* hdr */ \
18 : FD_DCACHE_SLOT_ALIGN, FD_DCACHE_GUARD_FOOTPRINT ), /* guard */ \
19 : FD_DCACHE_ALIGN, (data_sz) ), /* data */ \
20 : FD_DCACHE_ALIGN, (app_sz) ), /* app */ \
21 : FD_DCACHE_ALIGN )
22 :
23 : /* FD_DCACHE_GUARD_FOOTPRINT specify the footprint of the guard region
24 : immediately before the dcache data region. The guard region
25 : footprint is FD_DCACHE_SLOT_ALIGN aligned and a FD_DCACHE_SLOT_ALIGN
26 : multiple. It provides flexibility (up to the magnitude of the
27 : footprint) to align how a producer might write directly into a dcache
28 : such that the frag payload alignment a consumer sees is consistent
29 : regardless of the details of the underlying producer. */
30 :
31 12 : #define FD_DCACHE_GUARD_FOOTPRINT (3968UL)
32 :
33 : /* FD_DCACHE_SLOT_FOOTPRINT returns the footprint of a
34 : FD_DCACHE_SLOT_ALIGN aligned slot sufficient to hold a frag payload
35 : of up to mtu bytes. Returns 0 if mtu is not valid (i.e. so large
36 : that the required slot size is larger than ULONG_MAX). */
37 :
38 : #define FD_DCACHE_SLOT_ALIGN (128UL)
39 2878572 : #define FD_DCACHE_SLOT_FOOTPRINT( mtu ) FD_ULONG_ALIGN_UP( (mtu), FD_DCACHE_SLOT_ALIGN )
40 :
41 : /* FD_DCACHE_REQ_DATA_SZ returns the size of a data region in bytes
42 : sufficient for a dcache whose producer writes frag payloads up to mtu
43 : (should be positive) bytes in size, that can have up to depth (should
44 : be positive) frag payloads visible to consumers while the producer
45 : can be concurrently preparing up to burst (should be positive) frag
46 : payloads. Assumes mtu, depth, burst and compact are valid and
47 : payload footprints are rounded up to at most a FD_DCACHE_ALIGN
48 : multiple when written by a producer. Note that payloads written by a
49 : producer will generally be at least FD_DCACHE_ALIGN aligned to
50 : facilitate interoperability with fd_frag_meta_t chunk indexing. Also
51 : note that for a compactly stored ring, it is usually not useful to
52 : use a burst larger than 1 (but not particularly harmful outside
53 : resulting a data region larger than necessary ... might use it to
54 : quasi-batch publish frags). */
55 :
56 : #define FD_DCACHE_REQ_DATA_SZ( mtu, depth, burst, compact ) (FD_DCACHE_SLOT_FOOTPRINT( mtu )*((depth)+(burst)+(ulong)!!(compact)))
57 :
58 : FD_PROTOTYPES_BEGIN
59 :
60 : /* Construction API */
61 :
62 : /* fd_dcache_req_data_sz is the same as FD_DCACHE_REQ_DATA_SZ but does
63 : not assume valid arguments. Returns sz on success or 0 on failure.
64 : Reasons for failure include zero mtu, too large mtu, zero depth, zero
65 : burst or the required data_sz would be larger than ULONG_MAX. */
66 :
67 : FD_FN_CONST ulong
68 : fd_dcache_req_data_sz( ulong mtu,
69 : ulong depth,
70 : ulong burst,
71 : int compact );
72 :
73 : /* fd_dcache_{align,footprint} return the required alignment and
74 : footprint of a memory region suitable for use as dcache with a data
75 : region of data_sz bytes and an application region of app_sz bytes.
76 : align returns FD_DCACHE_ALIGN. If data_sz or app_sz are invalid
77 : (e.g. the required footprint is larger than a ULONG_MAX), footprint
78 : will silently return 0 (and thus can be used by the caller to
79 : validate dcache configuration parameters). Zero is valid for data_sz
80 : and/or app_sz. */
81 :
82 : FD_FN_CONST ulong
83 : fd_dcache_align( void );
84 :
85 : FD_FN_CONST ulong
86 : fd_dcache_footprint( ulong data_sz,
87 : ulong app_sz );
88 :
89 : /* fd_dcache_new formats an unused memory region for use as a dcache.
90 : shmem is a non-NULL pointer to this region in the local address space
91 : with the required footprint and alignment. The size of the dcache
92 : data size region is data_sz bytes and the size of the application
93 : region is app_sz bytes. Zero is valid for data_sz and/or app_sz.
94 :
95 : Returns shmem (and the memory region it points to will be formatted
96 : as a dcache with the application region initialized to zero, caller
97 : is not joined) on success and NULL on failure (logs details).
98 : Reasons for failure include obviously bad shmem, bad data_sz or bad
99 : app_sz. */
100 :
101 : void *
102 : fd_dcache_new( void * shmem,
103 : ulong data_sz,
104 : ulong app_sz );
105 :
106 : /* fd_dcache_join joins the caller to the dcache. shdcache points to
107 : the first byte of the memory region backing the dcache in the
108 : caller's address space.
109 :
110 : Returns a pointer in the local address space to the dcache's data
111 : region on success (IMPORTANT! THIS IS NOT JUST A CAST OF SHDCACHE)
112 : and NULL on failure (logs details). Reasons for failure are that
113 : shdcache is obviously not a pointer to memory region holding a
114 : dcache. Every successful join should have a matching leave. The
115 : lifetime of the join is until the matching leave or the thread group
116 : is terminated.
117 :
118 : This region will have a guard region of FD_DCACHE_GUARD_FOOTPRINT
119 : just before it and data_sz bytes available after it. */
120 :
121 : uchar *
122 : fd_dcache_join( void * shdcache );
123 :
124 : /* fd_dcache_leave leaves a current local join. Returns a pointer to
125 : the underlying shared memory region on success (IMPORTANT! THIS IS
126 : NOT JUST A CAST OF DCACHE) and NULL on failure (logs details).
127 : Reasons for failure include dcache is NULL. */
128 :
129 : void *
130 : fd_dcache_leave( uchar const * dcache );
131 :
132 : /* fd_dcache_delete unformats a memory region used as a dcache. Assumes
133 : nobody is joined to the region. Returns a pointer to the underlying
134 : shared memory region or NULL if used obviously in error (e.g.
135 : shdcache is obviously not a dcache ... logs details). The ownership
136 : of the memory region is transferred to the caller. */
137 :
138 : void *
139 : fd_dcache_delete( void * shdcache );
140 :
141 : /* Accessor API */
142 :
143 : /* fd_dcache_{data_sz,app_sz} return the sizes of the {data,app}
144 : regions. Assumes dcache is a current local join. */
145 :
146 : FD_FN_PURE ulong fd_dcache_data_sz( uchar const * dcache );
147 : FD_FN_PURE ulong fd_dcache_app_sz ( uchar const * dcache );
148 :
149 : /* fd_dcache_app_laddr returns location in the caller's local address
150 : space of memory set aside for application specific usage. Assumes
151 : dcache is a current local join. The lifetime of the returned pointer
152 : is the same as the underlying join. This region has FD_DCACHE_ALIGN
153 : alignment (double cache line) and is fd_cache_app_sz( dcache ) in
154 : size. laddr_const is a const-correct version. */
155 :
156 : FD_FN_PURE uchar const * fd_dcache_app_laddr_const( uchar const * dcache );
157 : FD_FN_PURE uchar * fd_dcache_app_laddr ( uchar * dcache );
158 :
159 : /* fd_dcache_compact_is_safe return whether the dcache can safely store
160 : frags in compactly quasi ring like as described in
161 : fd_dcache_chunk_next below.
162 :
163 : Chunks are indexed relative to base (e.g. the wksp containing the
164 : dcache to facilitate multiple dcaches written by multiple producers
165 : concurrently in the same wksp using a common chunk indexing scheme at
166 : consumers ... base==dcache is fine and implies chunks in this dcache
167 : region will be indexed starting from zero).
168 :
169 : base and dcache should be double chunk aligned, dcache should be
170 : current local join, base and dcache should be relatively spaced
171 : identically between different thread groups that might use the chunk
172 : indices and sufficiently close in the local address space that the
173 : all data region chunk addresses can be losslessly compressed and
174 : shared via a 32-bit fd_frag_meta_t chunk field.
175 :
176 : mtu is the maximum frag that a producer might write into this dcache.
177 : It is assumed that the producer will round up the footprint of frags
178 : into the dcache into double chunk aligned boundaries.
179 :
180 : depth is the maximum number of frags that might be concurrently
181 : accessing frags in this dcache.
182 :
183 : Returns 1 if the dcache is safe and 0 if not (with details logged). */
184 :
185 : int
186 : fd_dcache_compact_is_safe( void const * base,
187 : void const * dcache,
188 : ulong mtu,
189 : ulong depth );
190 :
191 : /* fd_dcache_compact_{chunk0,chunk1,wmark} returns the range of chunk indices
192 : [chunk0,chunk1) that relative to the base address covered by the
193 : dcache's data region and watermark chunk index for use by
194 : fd_dcache_compact_chunk_next below.
195 : 0<=chunk0<=wmark<=chunk1<=UINT_MAX. These assume dcache is current
196 : local join and the base / dcache pass fd_dcache_is_compact_safe
197 : above. */
198 :
199 : FD_FN_CONST static inline ulong
200 : fd_dcache_compact_chunk0( void const * base,
201 2301 : void const * dcache ) {
202 2301 : return ((ulong)dcache - (ulong)base) >> FD_CHUNK_LG_SZ;
203 2301 : }
204 :
205 : FD_FN_PURE static inline ulong
206 : fd_dcache_compact_chunk1( void const * base,
207 4458 : void const * dcache ) {
208 4458 : return ((ulong)dcache + fd_dcache_data_sz( (uchar const *)dcache ) - (ulong)base) >> FD_CHUNK_LG_SZ;
209 4458 : }
210 :
211 : FD_FN_PURE static inline ulong
212 : fd_dcache_compact_wmark( void const * base,
213 : void const * dcache,
214 2307 : ulong mtu ) {
215 2307 : ulong chunk_mtu = ((mtu + 2UL*FD_CHUNK_SZ-1UL) >> (1+FD_CHUNK_LG_SZ)) << 1;
216 2307 : return fd_dcache_compact_chunk1( base, dcache ) - chunk_mtu;
217 2307 : }
218 :
219 : /* fd_dcache_compact_chunk_next:
220 :
221 : Let a dcache have space for at least chunk_mtu*(depth+2)-1 chunks
222 : where chunks are indexed [chunk0,chunk1) and chunk_mtu is a
223 : sufficient number of chunks to hold the worst case frag size.
224 : Further, let the dcache's producer write frags into the dcache at
225 : chunk aligned positions with a footprint of at most chunk_mtu chunks
226 : (with one exception noted below). Lastly, let the producer write
227 : frags contiguously into the dcache such that consumers do not need to
228 : do any special handling for frags that wrap around the end of the
229 : dcache.
230 :
231 : Since the producer does not necessarily know the size of a frag as it
232 : is producing it but does know a priori the maximum size of a frag it
233 : might produce, the producer can achieve this by making the first
234 : chunk of any frag it writes in:
235 :
236 : [chunk0,wmark]
237 :
238 : where:
239 :
240 : wmark = chunk1 - chunk_mtu
241 :
242 : This is equivalent to saying that, if there are at least chunk_mtu
243 : chunks until the end of a dcache after a frag, that frag's footprint
244 : will be enough contiguous chunks to cover the frag (up to chunk_mtu).
245 : But if there are less than chunk_mtu chunks, that frag's footprint
246 : will be until the end of the dcache.
247 :
248 : This implies, in the worst case, there at least depth+1 chunk_mtu
249 : footprint frags (those not near the end) and 1 frag with a
250 : 2*chunk_mtu-1 footprint (the one frag nearest the dcache end) in the
251 : dcache. depth of these are exposed to consumers and 1 in preparation
252 : by the producer. It also implies that the set of chunks in the
253 : dcache in use is cyclically contiguous starting from the oldest
254 : consumer exposed frag until the currently exposed frag.
255 :
256 : Noting that the act of publishing in the in preparation frag also
257 : unpublishes the oldest exposed frag. Given the above, this
258 : guarantees that there is at least chunk_mtu contiguous space
259 : available for use by the next frag so long as chunk_mtu is large
260 : enough to cover the worst case frag and the dcache has room at least
261 : for chunk_mtu*(depth+2)-1 chunks. */
262 :
263 : FD_FN_CONST static inline ulong /* Will be in [chunk0,wmark] */
264 : fd_dcache_compact_next( ulong chunk, /* Assumed in [chunk0,wmark] */
265 : ulong sz, /* Assumed in [0,mtu] */
266 : ulong chunk0, /* From fd_dcache_compact_chunk0 */
267 62895740 : ulong wmark ) { /* From fd_dcache_compact_wmark */
268 62895740 : chunk += ((sz+(2UL*FD_CHUNK_SZ-1UL)) >> (1+FD_CHUNK_LG_SZ)) << 1; /* Advance to next chunk pair, no overflow if init passed */
269 62895740 : return fd_ulong_if( chunk>wmark, chunk0, chunk ); /* If that goes over the high water mark, wrap to zero */
270 62895740 : }
271 :
272 : FD_PROTOTYPES_END
273 :
274 : #endif /* HEADER_fd_src_tango_dcache_fd_dcache_h */
275 :
|