Line data Source code
1 : #ifndef HEADER_fd_src_tango_fctl_fd_fctl_h
2 : #define HEADER_fd_src_tango_fctl_fd_fctl_h
3 :
4 : /* fctl provides a set of APIs for general purpose, ultra flexible,
5 : ultra low overhead credit-based flow control. That being said,
6 : backpressure is the worst thing in the world for a large scale
7 : distributed system. So the system should be designed to use flow
8 : control exceedingly sparingly and limit the number of strictly
9 : reliable consumers needed in the system to, ideally, zero. That is,
10 : the less this API gets used, the better the overall system is. */
11 :
12 : #include "../fd_tango_base.h"
13 :
14 : /* FD_FCTL_RX_MAX_MAX returns the largest number of receivers a fctl can
15 : be sized to accommodate. Should be in [1,65535]. */
16 :
17 : #define FD_FCTL_RX_MAX_MAX (65535UL)
18 :
19 : /* FD_FCTL_{ALIGN,FOOTPRINT} specify the alignment and footprint needed
20 : for a fctl. ALIGN will be positive integer power of 2. FOOTPRINT
21 : assumes rx_max is in [0,FD_FCTL_RX_MAX_RX_MAX]. */
22 :
23 0 : #define FD_FCTL_ALIGN (8UL)
24 : #define FD_FCTL_FOOTPRINT( rx_max ) \
25 9 : FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_INIT, \
26 9 : alignof(fd_fctl_t), sizeof(fd_fctl_t) ), \
27 9 : alignof(fd_fctl_private_rx_t), (rx_max)*sizeof(fd_fctl_private_rx_t) ), \
28 9 : FD_FCTL_ALIGN )
29 :
30 : /* A fd_fctl_t is an opaque handle to an flow control object that can
31 : manage flow control on behalf of a transmitter for a zero or more
32 : (potentially dynamic) reliable (i.e. backpressure allowed) receivers. */
33 :
34 : struct fd_fctl_private;
35 : typedef struct fd_fctl_private fd_fctl_t;
36 :
37 : /* Private APIs *******************************************************/
38 :
39 : /* For the most part, applications should not interacting with these
40 : directly. They are exposed to facilitate compile time inlining of
41 : flow control operations in performance critical loops. */
42 :
43 : struct fd_fctl_private_rx {
44 : long cr_max; /* See fd_fctl_cfg_rx_add for details, should be positive */
45 : ulong const * seq_laddr; /* ", NULL indicates an inactive rx */
46 : ulong * slow_laddr; /* " */
47 : };
48 :
49 : typedef struct fd_fctl_private_rx fd_fctl_private_rx_t;
50 :
51 : struct fd_fctl_private {
52 : ushort rx_max; /* Maximum number of receivers for this fctl, in [0,FD_FCTL_RX_MAX_MAX] */
53 : ushort rx_cnt; /* Current number of receivers for this fctl, in [0,rx_max] */
54 : int in_refill; /* 0 / 1 if the flow control currently in a refilling state */
55 : ulong cr_burst; /* See fd_fctl_cfg_done for details, in [1,LONG_MAX] (not ULONG_MAX) */
56 : ulong cr_max; /* ", in [cr_burst,LONG_MAX] */
57 : ulong cr_resume; /* ", in [cr_burst,cr_max ] */
58 : ulong cr_refill; /* ", In [1,cr_resume ] */
59 : /* rx_max fd_fctl_private_rx_t array indexed [0,rx_max) follows. Only
60 : elements [0,rx_cnt) are in use. Only elements with non-NULL
61 : seq_laddr are currently allowed to backpressure this fctl. */
62 : };
63 :
64 : FD_PROTOTYPES_BEGIN
65 :
66 : /* fd_fctl_rx returns a pointer to the fctl's rx array. Assumes fctl is
67 : valid. fd_fctl_rx_const is a const correct version. */
68 :
69 : FD_FN_CONST static inline fd_fctl_private_rx_t *
70 1154 : fd_fctl_private_rx( fd_fctl_t * fctl ) {
71 1154 : return (fd_fctl_private_rx_t *)(fctl+1UL);
72 1154 : }
73 :
74 : FD_FN_CONST static inline fd_fctl_private_rx_t const *
75 31268 : fd_fctl_private_rx_const( fd_fctl_t const * fctl ) {
76 31268 : return (fd_fctl_private_rx_t const *)(fctl+1UL);
77 31268 : }
78 :
79 : FD_PROTOTYPES_END
80 :
81 : /* Public APIs ********************************************************/
82 :
83 : FD_PROTOTYPES_BEGIN
84 :
85 : /* Constructor APIs */
86 :
87 : /* fd_fctl_{align,footprint} return the required alignment and footprint
88 : of a memory region suitable for use as fctl that can manage flow control
89 : for up to rx_max reliable consumers. align returns FD_FCTL_ALIGN
90 : (will be a power of two). rx_max should be in [0,FCTL_RX_MAX_MAX].
91 : If not, footprint will silently return 0 (and thus can be used by the
92 : caller to validate rx_max configuration parameters). */
93 :
94 : FD_FN_CONST static inline ulong
95 0 : fd_fctl_align( void ) {
96 0 : return FD_FCTL_ALIGN;
97 0 : }
98 :
99 : FD_FN_CONST static inline ulong
100 12 : fd_fctl_footprint( ulong rx_max ) {
101 12 : if( FD_UNLIKELY( rx_max>FD_FCTL_RX_MAX_MAX ) ) return 0UL;
102 9 : return FD_FCTL_FOOTPRINT( rx_max );
103 12 : }
104 :
105 : /* fd_fctl_new takes ownership of the memory region pointed to by shmem
106 : (which is assumed to be non-NULL with the appropriate alignment and
107 : footprint) and formats it as fctl. Typically this memory region will
108 : just be local to the user though it can be placed in a shared region
109 : to allow remote monitors to inspect / modify various portions of it,
110 : facilitate hotswapping, etc. Returns mem on success and NULL on
111 : failure (logs details). The fctl will be initialized to an
112 : unconfigured state with zero receivers attached to it on success
113 : return. Reasons for failure include an obviously bad shmem region
114 : and too large rx_max.
115 :
116 : fd_fctl_join joins the caller to a memory region holding the state of
117 : a fctl. shfctl points to a memory region in the local address space
118 : that holds a fctl. Returns an opaque handle of the local join in the
119 : local address space to the fctl (which might not be the same thing as
120 : shfctl ... the separation of new and join is to facilitate
121 : interprocess shared memory usage patterns while supporting
122 : transparent upgrade of users of this to more elaborate algorithms
123 : where address translations under the hood may not be trivial).
124 :
125 : fd_fctl_leave leaves the current fctl join. Returns a pointer in the
126 : local address space to the memory region holding the state of the
127 : fctl. The join should not be used afterward.
128 :
129 : fd_fctl_delete unformats the memory region currently used to hold the
130 : state of a fctl and returns ownership of the underlying memory region
131 : to the caller. There should be no joins in the system on the fctl.
132 : Returns a pointer to the underlying memory region. */
133 :
134 : void *
135 : fd_fctl_new( void * shmem,
136 : ulong rx_max );
137 :
138 9 : static inline fd_fctl_t * fd_fctl_join ( void * shfctl ) { return (fd_fctl_t *)shfctl; }
139 9 : static inline void * fd_fctl_leave ( fd_fctl_t * fctl ) { return (void *)fctl; }
140 9 : static inline void * fd_fctl_delete( void * shfctl ) { return shfctl; }
141 :
142 : /* fd_fctl_cfg_rx_add adds flow control details for a receiver to the
143 : given fctl. Assumes the fctl configuration is not yet complete
144 : (FIXME: CONSIDER EXPLICITLY VERIFYING THIS). Each receiver is
145 : assigned an index starting from zero sequentially as they are added.
146 : This index is used to communicate diagnostic information to the user
147 : (e.g. see fd_fctl_cr_query rx_idx_slow).
148 :
149 : cr_max is how many credits are safe for the transmitter to burst to
150 : the receiver when this receiver is fully caught up. Should be in
151 : [1,LONG_MAX] (not ULONG_MAX).
152 :
153 : seq_laddr is the location in the user's local address space where the
154 : user can query a lower bound of where this receiver is currently at
155 : in the underlying sequence space. The user is guaranteed that the
156 : receiver has processed all sequence numbers strictly before
157 : *seq_laddr. NULL is fine here (the fctl will ignore this receiver
158 : until it is set, potentially post configuration).
159 :
160 : slow_laddr is the location in the user's local address space where
161 : the fctl should accumulate statistics for which receiver is running
162 : the slowest. It is fine if other events are accumulated to this
163 : location (e.g. the user doesn't need ultra fine grained diagnostics).
164 :
165 : Returns fctl on success and NULL on failure (logs details). Reasons
166 : for failure include NULL fctl, NULL slow_laddr, too many fctl
167 : receivers, too small cr_max, too large cr_max. */
168 :
169 : fd_fctl_t *
170 : fd_fctl_cfg_rx_add( fd_fctl_t * fctl,
171 : ulong cr_max,
172 : ulong const * seq_laddr,
173 : ulong * slow_laddr );
174 :
175 : /* fd_fctl_cfg_done completes the configuration of a flow control.
176 : Assumes the fctl configuration is not yet complete (FIXME: CONSIDER
177 : EXPLICITLY VERIFYING THIS?).
178 :
179 : cr_burst is the maximum number of credits a transmitter will use in a
180 : burst (i.e. deduct from its credits available before checking that it
181 : still had credits available again, e.g. MTU for Ethernet-like
182 : protocols, MSS for a byte oriented TCP-like protocols, number of
183 : slots in a batch for slot oriented protocols like a batch of frag
184 : metadata, etc). Should be in [1,cr_burst_max] where cr_burst_max is
185 : min(rx[:].cr_max) and LONG_MAX when there are no receivers.
186 :
187 : cr_max is an upper bound of the number of credits a transmitter can
188 : have (e.g. how many credits a transmitter should get from a query
189 : when there are no active receivers). Should be in
190 : [cr_burst,LONG_MAX]. 0 indicates to pick a reasonable default for
191 : the configured receivers (e.g. cr_burst_max). This limit is mostly
192 : for dynamic configured flow control situations (e.g. it provides an
193 : upper limit to how lazy the transmitter can be with flow control
194 : operations).
195 :
196 : cr_resume is the credit threshold (in a >= sense) for the fctl to
197 : stop trying to refill credits from active receivers. Should be in
198 : [cr_burst,actual_cr_max] where actual_cr_max is the value determined
199 : above. 0 indicates to pick a reasonable default (e.g.
200 : cr_burst+floor((2/3)(actual_cr_max-cr_burst)).
201 :
202 : cr_refill is the credit threshold (in a < sense) for the fctl to
203 : start trying to refill its credits from active receivers. Should be
204 : in [cr_burst,actual_cr_resume] where actual_cr_resume is the value
205 : determined above. 0 indicates to pick a reasonable default (e.g.
206 : cr_burst+floor((1/2)(actual_cr_resume-cr_burst)).
207 :
208 : Returns fctl on success (fctl configuration will be complete on
209 : return) and NULL on failure (logs details). Reasons for failure
210 : include NULL fctl, too large cr_max, cr_resume, cr_refill.
211 :
212 : TL;DR Just say zeros for cr_{max,resume,refill} if you don't
213 : understand the above and this will usually do something reasonable. */
214 :
215 : fd_fctl_t *
216 : fd_fctl_cfg_done( fd_fctl_t * fctl,
217 : ulong cr_burst,
218 : ulong cr_max,
219 : ulong cr_resume,
220 : ulong cr_refill );
221 :
222 : /* Accessor APIs */
223 :
224 : /* fd_fctl_{rx_max,rx_cnt,
225 : cr_burst,cr_max,cr_resume,cr_refill,
226 : rx_cr_max,rx_seq_laddr,
227 : rx_seq_slow_laddr,rx_seq_slow_laddr_const}
228 : return the values configured during fctl initialization.
229 :
230 : These assume fctl is a local join to a configured fctl. rx_cr_max,
231 : rx_seq_laddr, rx_slow_laddr and rx_slow_laddr_const further assume
232 : rx_idx is in [0,rx_cnt). slow_laddr_const is a const-correct
233 : version of rx_slow_laddr.
234 :
235 : (FIXME: CONSIDER ACCESSES FOR DISTINGUISHING WHETHER CR_MAX /
236 : CR_RESUME / CR_REFILL WERE AUTOCONFIGURED. EXPOSE IN_REFILL?
237 : GET/SET RX_SEQ_LADDR DYNAMICALLY? GET/SET IN_REFILL?) */
238 :
239 3 : FD_FN_PURE static inline ulong fd_fctl_rx_max ( fd_fctl_t const * fctl ) { return (ulong)fctl->rx_max; }
240 3 : FD_FN_PURE static inline ulong fd_fctl_rx_cnt ( fd_fctl_t const * fctl ) { return (ulong)fctl->rx_cnt; }
241 12 : FD_FN_PURE static inline ulong fd_fctl_cr_burst ( fd_fctl_t const * fctl ) { return fctl->cr_burst; }
242 15 : FD_FN_PURE static inline ulong fd_fctl_cr_max ( fd_fctl_t const * fctl ) { return fctl->cr_max; }
243 15 : FD_FN_PURE static inline ulong fd_fctl_cr_resume( fd_fctl_t const * fctl ) { return fctl->cr_resume; }
244 15 : FD_FN_PURE static inline ulong fd_fctl_cr_refill( fd_fctl_t const * fctl ) { return fctl->cr_refill; }
245 :
246 : FD_FN_PURE static inline ulong
247 : fd_fctl_rx_cr_max( fd_fctl_t const * fctl,
248 18 : ulong rx_idx ) {
249 18 : return (ulong)fd_fctl_private_rx_const( fctl )[rx_idx].cr_max;
250 18 : }
251 :
252 : FD_FN_PURE static inline ulong const *
253 : fd_fctl_rx_seq_laddr( fd_fctl_t const * fctl,
254 105 : ulong rx_idx ) {
255 105 : return fd_fctl_private_rx_const( fctl )[rx_idx].seq_laddr;
256 105 : }
257 :
258 : FD_FN_PURE static inline ulong *
259 : fd_fctl_rx_slow_laddr( fd_fctl_t * fctl,
260 777 : ulong rx_idx ) {
261 777 : return fd_fctl_private_rx( fctl )[rx_idx].slow_laddr;
262 777 : }
263 :
264 : FD_FN_PURE static inline ulong const *
265 : fd_fctl_rx_slow_laddr_const( fd_fctl_t const * fctl,
266 0 : ulong rx_idx ) {
267 0 : return fd_fctl_private_rx_const( fctl )[rx_idx].slow_laddr;
268 0 : }
269 :
270 : /* fd_fctl_rx_cr_return updates users of _rx_seq flow control (e.g. from
271 : rx_seq_laddr above) the position of the receiver in sequence space
272 : (in the sense that the receiver has consumed all sequence numbers
273 : strictly before rx_seq cyclic). This should be done moderately
274 : frequently (e.g. in background housekeeping) after the receiver has
275 : moved forward in sequence space since the last update and should be
276 : monotonically non-decreasing. Even more aggressively is usually
277 : fine. This also should be done when the receiver is shutdown to
278 : facilitate cleanly restarting a consumer and what not. This also
279 : serves as a compiler memory fence to ensure credits are returned at a
280 : well defined point in the instruction stream (e.g. so that compiler
281 : doesn't move any loads that might be clobbered by the return to after
282 : the return). */
283 :
284 : static inline void
285 : fd_fctl_rx_cr_return( ulong * _rx_seq,
286 17317027 : ulong rx_seq ) {
287 17317027 : FD_COMPILER_MFENCE();
288 17317027 : FD_VOLATILE( *_rx_seq ) = rx_seq;
289 17317027 : FD_COMPILER_MFENCE();
290 17317027 : }
291 :
292 : /**********************************************************************/
293 :
294 : /* fd_fctl_cr_query returns a lower bound of the number of credits
295 : available to a transmitter that has published up to but NOT INCLUDING
296 : tx_seq. Will be in [0,cr_max]. On return, *_rx_idx_slow will
297 : contain the lowest indexed receiver that constrained the result or
298 : ULONG_MAX if the query result wasn't receiver constrained.
299 :
300 : This involves interthread communication so should be used sparingly.
301 :
302 : FIXME: DO AS A MULTIPLE RETURN MACRO TO FORCE INLINE AND AVOID
303 : RX_IDX_SLOW MEMORY USAGE? DO AS A NON-INLINED CALL? OPTIMUM
304 : PROBABLY DEPENDS ON USE CASES. */
305 :
306 : FD_FN_UNUSED static ulong /* Work around -Winline */
307 : fd_fctl_cr_query( fd_fctl_t const * fctl,
308 : ulong tx_seq,
309 31145 : ulong * _rx_idx_slow ) {
310 31145 : fd_fctl_private_rx_t const * rx = fd_fctl_private_rx_const( fctl );
311 31145 : ulong rx_cnt = (ulong)fctl->rx_cnt;
312 :
313 31145 : ulong cr_query = fctl->cr_max;
314 31145 : ulong rx_idx_slow = ULONG_MAX;
315 :
316 : /* Note: it is possible to do this in vectorized / in parallel */
317 :
318 : /* Note that the rx_cr_query calc is robust against overflow /
319 : misconfigured receivers / etc. Let delta by the number of sequence
320 : numbers that that the transmitter is ahead of the receiver and note
321 : that the calc can be written as rx_cr_query=max(rx_cr_test,0) where
322 : rx_cr_test is rx_cr_max-max(delta,0).
323 :
324 : In normal operation, delta is in [0,rx_cr_max] (i.e. the
325 : transmitter is at or ahead of the receiver but has not overrun the
326 : receiver). Then max(delta,0)==delta and is in [0,rx_cr_max]. As
327 : such, rx_cr_test is in [0,rx_cr_max] and the result is thus also in
328 : [0,rx_cr_max] (so no overflow). The result here is a lower bound
329 : of the credits the transmitter can use without overrunning the
330 : receiver.
331 :
332 : Suppose, the transmitter has or appears to have overrun the
333 : receiver (e.g. the transmitter initialized with a sequence number
334 : well ahead of the receiver). Then delta is in
335 : (rx_cr_max,LONG_MAX]. max(delta,0)==delta and is in
336 : (rx_cr_max,LONG_MAX]. As such, in exact arithmetic, rx_cr_test is
337 : in [rx_cr_max-LONG_MAX,0). But since rx_cr_max was restricted to
338 : be in [1,LONG_MAX] on initialization, this result is thus in
339 : [-LONG_MAX+1,0) and is computed without overflow (note that
340 : LONG_MIN==-LONG_MAX-1). Since this situation always produces a
341 : negative cr_test result, rx_cr_query will be capped at 0. This
342 : correctly indicates that the transmitter cannot send anything
343 : because it would exacerbate the already overrun receiver.
344 :
345 : Conversely, suppose the receiver appears to be ahead of the
346 : transmitter (e.g. the receiver was initialized with a sequence
347 : number ahead of the transmitter). Then delta is in [LONG_MIN,0).
348 : And max(delta,0)==0 such that rx_cr_test = rx_cr_query = rx_cr_max.
349 : The result here in conservative lower bound of the credits the
350 : transmitter can use. Conservative in the sense that it has been
351 : capped by rx_cr_max even though the value advertised by the
352 : receiver in principle would allow more (as a receiver getting ahead
353 : of the transmitter is a good sign of some breakage though, this
354 : situation strongly suggests the rx_seq value shouldn't be trusted
355 : such that rx_cr_max is a fallback in this case). */
356 :
357 529387 : for( ulong rx_idx=0UL; rx_idx<rx_cnt; rx_idx++ ) {
358 498242 : ulong const * _rx_seq = rx[ rx_idx ].seq_laddr;
359 498242 : if( FD_UNLIKELY( !_rx_seq ) ) continue; /* Skip inactive rx */
360 :
361 498242 : ulong rx_seq = FD_VOLATILE_CONST( *_rx_seq );
362 498242 : ulong rx_cr_query = (ulong)fd_long_max( rx[ rx_idx ].cr_max - fd_long_max( fd_seq_diff( tx_seq, rx_seq ), 0L ), 0L );
363 498242 : rx_idx_slow = fd_ulong_if( rx_cr_query<cr_query, rx_idx, rx_idx_slow );
364 498242 : cr_query = fd_ulong_min( rx_cr_query, cr_query );
365 498242 : }
366 :
367 31145 : _rx_idx_slow[0] = rx_idx_slow;
368 31145 : return cr_query;
369 31145 : }
370 :
371 : /* fd_fctl_tx_cr_update returns the new number of credits available to
372 : a transmitter given the current number of credits available to that
373 : transmitter and the transmitter's position in sequence space.
374 :
375 : The vast majority of flow control scenarios (even incredibly
376 : intricate dynamic heterogeneous multiple consumer) can typically be
377 : handled with just this one call in the transmitter's run loop.
378 :
379 : It assumes that reliable receivers are updating their position in
380 : sequence space moderately frequently, the transmitter and receivers
381 : have reasonably large credit maxes (to provide a lot of tolerance for
382 : temporary receiver lagging and enough flexibility to keep the vast
383 : majority of flow credit management out of the critical path) and that
384 : reliable receivers asymptotically can keep up with the transmitter on
385 : average (which is implied anyway as otherwise the overall system is
386 : inherently doomed).
387 :
388 : What a flow control credit means depends on usage. Typical cases
389 : number of bytes for TCP streaming like protocols, number of packet
390 : slots for more bounded size packet oriented protocols, etc.) Each
391 : flow control credit is good for the transmitter to consume exactly
392 : one sequence number.
393 :
394 : Typically, on initialization / after first housekeeping, the
395 : transmitter has a huge number of flow control credits.
396 :
397 : Usually, while the transmitter is consuming these credits, it is not
398 : bothering the receivers at all.
399 :
400 : When the transmitter gets to below around ~(1/3) cr_max of credits
401 : available, the fctl will query receivers on its behalf to refresh the
402 : number of credits available. If all is well (receivers are staying
403 : close to caught up with the transmitters ... typically within (1/3)
404 : cr_max of the transmitter), this query quickly returns and the
405 : transmitter will be back to having a huge number of credits. Since
406 : there is lots of margin between credit exhaustion and the start of
407 : trying to refill, this querying doesn't need to be in transmitter's
408 : critical path.
409 :
410 : As such, when receivers are keeping up, fctl will only rarely do
411 : rx->tx communications (roughly every ~2/3 cr_max sequence numbers
412 : sent). And, when receivers aren't keeping up, having the actual
413 : update call running asynchronously out of the critical path (e.g.
414 : in a housekeeping loop) prevents the fctl from flooding the system
415 : with tons of flow control communications. This can further degrade
416 : the performance of already slow receivers, exacerbating the slow down
417 : and causing congestion collapse like issues on modern CPU NOCs.
418 :
419 : Related, using distinct and well separated refill and resume
420 : thresholds prevents performance degrading start / stop situations
421 : (e.g. if resume and refill thresholds are very close, the situation
422 : can arise where the fctl falls below, triggers a query, the receivers
423 : are just above, triggering another round of flow control
424 : communications shortly thereafter and another and another ...). This
425 : behavior can cause a big degradation in system performance due to the
426 : same flow control communications flooding. The typical spacing here
427 : guarantees that transmitter only resume sending once it is sure it
428 : will be able to send ~1/3 cr_max sequence numbers before querying the
429 : receivers again.
430 :
431 : Analogously, receives don't need to do flow control sequence number
432 : updates in their critical paths. But, even if done there, because it
433 : is infrequently queried by the transmitter, it typically will be an
434 : ultra fast L1 store cache hit with minimal performance implications.
435 :
436 : TL;DR
437 :
438 : Example reliable receiver init:
439 :
440 : ...
441 : ulong * fctl_seq = ... location in receivers address space of receiver flow control (ideally near the receiver)
442 : ...
443 : ulong rx_seq = ... next sequence number this receiver expects from the transmitter
444 : ...
445 :
446 : Example reliable receiver run loop (lots of variants with different
447 : tradeoffs possible):
448 :
449 : ...
450 : if( ... time for housekeeping ... ) {
451 : ...
452 : FD_VOLATILE( fctl_seq[0] ) = rx_seq; // Update the transmitter and monitors where we are at
453 : // It is fine to be quite aggressive about this as this is
454 : // should be a L1 cache hit store the vast majority of the time.
455 : ...
456 : }
457 : ...
458 :
459 : rx_cnt = ... receive from transmitter starting at rx_seq
460 :
461 : // At this point, we just received [rx_seq,rx_seq+rx_cnt). Process
462 : // these. When we are ready, tell the transmitter we are fine for
463 : // the transmitter to move on.
464 :
465 : rx_seq = fd_seq_inc( rx_seq, rx_cnt );
466 :
467 : ...
468 :
469 : Example transmitter init:
470 :
471 : ...
472 : fd_fctl_t * fctl = ... setup flow control for all reliable receivers ...
473 : ...
474 : ulong tx_seq = ... first sequence number of the next transmission ...
475 : ...
476 : ulong cr_avail = 0UL; // Will learn this on the first housekeeping
477 : ...
478 :
479 : Example transmitter run loop (lots of variants with different
480 : tradeoffs possible):
481 :
482 : ...
483 : if( ... time for housekeeping ... ) {
484 : ...
485 : cr_avail = fd_fctl_cr_update( fctl, cr_avail, tx_seq );
486 : ...
487 : }
488 : ...
489 :
490 : // If we don't have enough credits to handle the the size of a
491 : // worst case burst, wait while still doing housekeeping in the
492 : // background to keep flow control credits flowing and keep
493 : // monitoring / command-and-control operating.
494 :
495 : if( FD_UNLIKELY( cr_avail < cr_burst ) continue;
496 :
497 : ...
498 : ulong tx_cnt = ... determine how much we are ready to send
499 : ... will be in [0,cr_burst]
500 : ... send [tx_seq,tx_seq+tx_seq_cnt) to receivers
501 : tx_seq = fd_seq_inc( tx_seq, tx_cnt );
502 : cr_avail -= tx_cnt; // guaranteed not to underflow
503 : ...
504 : */
505 :
506 : static inline ulong
507 : fd_fctl_tx_cr_update( fd_fctl_t * fctl,
508 : ulong cr_avail,
509 36127 : ulong tx_seq ) {
510 :
511 36127 : int in_refill = fctl->in_refill;
512 :
513 36127 : if( FD_UNLIKELY( (cr_avail<fctl->cr_refill) | in_refill ) ) { /* Yes, strictly "<" */
514 :
515 : /* The number of credits available has just dropped below the
516 : transmitters refill threshold (such that we should enter the
517 : refilling state) or the transmitter is already in the refilling
518 : state ... query the receivers for the number of credits that
519 : might be available. */
520 :
521 31142 : ulong rx_idx_slow;
522 31142 : ulong cr_query = fd_fctl_cr_query( fctl, tx_seq, &rx_idx_slow );
523 :
524 31142 : if( FD_LIKELY( cr_query>=fctl->cr_resume ) ) { /* Yes, strictly ">=" */
525 :
526 : /* We got enough credits to resume. Update the credits available
527 : and exit the refilling state. */
528 :
529 260 : fctl->in_refill = 0;
530 260 : cr_avail = cr_query;
531 :
532 30882 : } else if( FD_LIKELY( !in_refill ) ) {
533 :
534 : /* We didn't get enough credits to resume and we are just entering
535 : the refilling state. Attribute this event to the slowest
536 : receiver (i.e. rx_idx_low is likely limiting system performance
537 : / potentially causing backpressure / etc). We don't bother
538 : with a proper atomic increment as this is just diagnostic;
539 : exact precision is not required for correctness (also, if there
540 : is no multiplexing of the rx's slow counter with counters on
541 : different threads, as is often the case, it will be exact
542 : anyway). Note that because the refilling is typically
543 : triggered well before the transmitter is actual not able to
544 : send, this counter should be more thought of as a "rx_idx_slow
545 : might be a source of stalls on the transmitter" as opposed to
546 : "rx_idx_slow caused a stall". */
547 :
548 263 : if( FD_LIKELY( rx_idx_slow!=ULONG_MAX ) ) {
549 263 : ulong * slow = fd_fctl_private_rx( fctl )[ rx_idx_slow ].slow_laddr;
550 263 : FD_COMPILER_MFENCE();
551 263 : slow[0] += 1UL;
552 263 : FD_COMPILER_MFENCE();
553 263 : }
554 263 : fctl->in_refill = 1;
555 :
556 263 : } /* else {
557 :
558 : // We didn't get enough credits to resume and we were already
559 : // in the refilling state. So, nothing to update.
560 :
561 : } */
562 :
563 31142 : }
564 :
565 36127 : return cr_avail;
566 36127 : }
567 :
568 : FD_PROTOTYPES_END
569 :
570 : #endif /* HEADER_fd_src_tango_fctl_fd_fctl_h */
|