Line data Source code
1 : #include "fd_topob.h"
2 :
3 : #include "../../util/pod/fd_pod_format.h"
4 : #include "fd_cpu_topo.h"
5 :
6 : #define SET_NAME cpu_bv
7 : #define SET_MAX FD_TILE_MAX
8 : #include "../../util/tmpl/fd_set.c"
9 :
10 : fd_topo_t *
11 : fd_topob_new( void * mem,
12 3 : char const * app_name ) {
13 3 : fd_topo_t * topo = (fd_topo_t *)mem;
14 :
15 3 : if( FD_UNLIKELY( !topo ) ) {
16 0 : FD_LOG_WARNING( ( "NULL topo" ) );
17 0 : return NULL;
18 0 : }
19 :
20 3 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)topo, alignof(fd_topo_t) ) ) ) {
21 0 : FD_LOG_WARNING( ( "misaligned topo" ) );
22 0 : return NULL;
23 0 : }
24 :
25 3 : fd_memset( topo, 0, sizeof(fd_topo_t) );
26 :
27 3 : FD_TEST( fd_pod_new( topo->props, sizeof(topo->props) ) );
28 :
29 3 : if( FD_UNLIKELY( strlen( app_name )>=sizeof(topo->app_name) ) ) FD_LOG_ERR(( "app_name too long: %s", app_name ));
30 3 : strncpy( topo->app_name, app_name, sizeof(topo->app_name) );
31 :
32 3 : topo->max_page_size = FD_SHMEM_GIGANTIC_PAGE_SZ;
33 3 : topo->gigantic_page_threshold = 4 * FD_SHMEM_HUGE_PAGE_SZ;
34 :
35 3 : topo->agave_affinity_cnt = 0;
36 3 : topo->blocklist_cores_cnt = 0;
37 :
38 3 : return topo;
39 3 : }
40 :
41 : fd_topo_wksp_t *
42 : fd_topob_wksp( fd_topo_t * topo,
43 6 : char const * name ) {
44 6 : if( FD_UNLIKELY( !topo || !name || !strlen( name ) ) ) FD_LOG_ERR(( "NULL args" ));
45 6 : if( FD_UNLIKELY( strlen( name )>=sizeof(topo->workspaces[ topo->wksp_cnt ].name ) ) ) FD_LOG_ERR(( "wksp name too long: %s", name ));
46 6 : if( FD_UNLIKELY( topo->wksp_cnt>=FD_TOPO_MAX_WKSPS ) ) FD_LOG_ERR(( "too many workspaces" ));
47 :
48 6 : fd_topo_wksp_t * wksp = &topo->workspaces[ topo->wksp_cnt ];
49 6 : strncpy( wksp->name, name, sizeof(wksp->name) );
50 6 : wksp->id = topo->wksp_cnt;
51 6 : wksp->core_dump_level = FD_TOPO_CORE_DUMP_LEVEL_REGULAR;
52 6 : topo->wksp_cnt++;
53 6 : return wksp;
54 6 : }
55 :
56 : fd_topo_obj_t *
57 : fd_topob_obj( fd_topo_t * topo,
58 : char const * obj_name,
59 66 : char const * wksp_name ) {
60 66 : if( FD_UNLIKELY( !topo || !obj_name || !wksp_name ) ) FD_LOG_ERR(( "NULL args" ));
61 66 : if( FD_UNLIKELY( strlen( obj_name )>=sizeof(topo->objs[ topo->obj_cnt ].name ) ) ) FD_LOG_ERR(( "obj name too long: %s", obj_name ));
62 66 : if( FD_UNLIKELY( topo->obj_cnt>=FD_TOPO_MAX_OBJS ) ) FD_LOG_ERR(( "too many objects" ));
63 :
64 66 : ulong wksp_id = fd_topo_find_wksp( topo, wksp_name );
65 66 : if( FD_UNLIKELY( wksp_id==ULONG_MAX ) ) FD_LOG_ERR(( "workspace not found: %s", wksp_name ));
66 :
67 66 : fd_topo_obj_t * obj = &topo->objs[ topo->obj_cnt ];
68 66 : memset( obj, 0, sizeof(fd_topo_obj_t) );
69 66 : strncpy( obj->name, obj_name, sizeof(obj->name) );
70 66 : obj->id = topo->obj_cnt;
71 66 : obj->wksp_id = wksp_id;
72 66 : obj->label_idx = ULONG_MAX;
73 66 : topo->obj_cnt++;
74 :
75 66 : return obj;
76 66 : }
77 :
78 : fd_topo_obj_t *
79 : fd_topob_obj_named( fd_topo_t * topo,
80 : char const * obj_type,
81 : char const * wksp_name,
82 0 : char const * label ) {
83 0 : if( FD_UNLIKELY( !label ) ) FD_LOG_ERR(( "NULL args" ));
84 0 : if( FD_UNLIKELY( strlen( label )>=sizeof(topo->objs[ topo->obj_cnt ].label ) ) ) FD_LOG_ERR(( "obj label too long: %s", label ));
85 0 : fd_topo_obj_t * obj = fd_topob_obj( topo, obj_type, wksp_name );
86 0 : if( FD_UNLIKELY( !obj ) ) return NULL;
87 :
88 0 : fd_cstr_ncpy( obj->label, label, sizeof(obj->label) );
89 0 : obj->label_idx = fd_topo_obj_cnt( topo, obj_type, label );
90 :
91 0 : return obj;
92 0 : }
93 :
94 : fd_topo_link_t *
95 : fd_topob_link( fd_topo_t * topo,
96 : char const * link_name,
97 : char const * wksp_name,
98 : ulong depth,
99 : ulong mtu,
100 18 : ulong burst ) {
101 18 : if( FD_UNLIKELY( !topo || !link_name || !wksp_name ) ) FD_LOG_ERR(( "NULL args" ));
102 18 : if( FD_UNLIKELY( strlen( link_name )>=sizeof(topo->links[ topo->link_cnt ].name ) ) ) FD_LOG_ERR(( "link name too long: %s", link_name ));
103 18 : if( FD_UNLIKELY( topo->link_cnt>=FD_TOPO_MAX_LINKS ) ) FD_LOG_ERR(( "too many links" ));
104 :
105 18 : ulong kind_id = 0UL;
106 39 : for( ulong i=0UL; i<topo->link_cnt; i++ ) {
107 21 : if( !strcmp( topo->links[ i ].name, link_name ) ) kind_id++;
108 21 : }
109 :
110 18 : fd_topo_link_t * link = &topo->links[ topo->link_cnt ];
111 18 : strncpy( link->name, link_name, sizeof(link->name) );
112 18 : link->id = topo->link_cnt;
113 18 : link->kind_id = kind_id;
114 18 : link->depth = depth;
115 18 : link->mtu = mtu;
116 18 : link->burst = burst;
117 :
118 18 : fd_topo_obj_t * obj = fd_topob_obj( topo, "mcache", wksp_name );
119 18 : link->mcache_obj_id = obj->id;
120 18 : FD_TEST( fd_pod_insertf_ulong( topo->props, depth, "obj.%lu.depth", obj->id ) );
121 :
122 18 : if( mtu ) {
123 6 : obj = fd_topob_obj( topo, "dcache", wksp_name );
124 6 : link->dcache_obj_id = obj->id;
125 6 : FD_TEST( fd_pod_insertf_ulong( topo->props, depth, "obj.%lu.depth", obj->id ) );
126 6 : FD_TEST( fd_pod_insertf_ulong( topo->props, burst, "obj.%lu.burst", obj->id ) );
127 6 : FD_TEST( fd_pod_insertf_ulong( topo->props, mtu, "obj.%lu.mtu", obj->id ) );
128 6 : }
129 18 : topo->link_cnt++;
130 :
131 18 : return link;
132 18 : }
133 :
134 : void
135 : fd_topob_tile_uses( fd_topo_t * topo,
136 : fd_topo_tile_t * tile,
137 : fd_topo_obj_t const * obj,
138 51 : int mode ) {
139 51 : (void)topo;
140 :
141 51 : if( FD_UNLIKELY( tile->uses_obj_cnt>=FD_TOPO_MAX_TILE_OBJS ) ) FD_LOG_ERR(( "tile `%s` uses too many objects", tile->name ));
142 :
143 51 : tile->uses_obj_id[ tile->uses_obj_cnt ] = obj->id;
144 51 : tile->uses_obj_mode[ tile->uses_obj_cnt ] = mode;
145 51 : tile->uses_obj_cnt++;
146 51 : }
147 :
148 : fd_topo_tile_t *
149 : fd_topob_tile( fd_topo_t * topo,
150 : char const * tile_name,
151 : char const * tile_wksp,
152 : char const * metrics_wksp,
153 : ulong cpu_idx,
154 : int is_agave,
155 : int uses_id_keyswitch,
156 6 : int uses_av_keyswitch ) {
157 :
158 6 : if( FD_UNLIKELY( !topo || !tile_name || !tile_wksp || !metrics_wksp ) ) FD_LOG_ERR(( "NULL args" ));
159 6 : if( FD_UNLIKELY( strlen( tile_name )>=sizeof(topo->tiles[ topo->tile_cnt ].name ) ) ) FD_LOG_ERR(( "tile name too long: %s", tile_name ));
160 6 : if( FD_UNLIKELY( topo->tile_cnt>=FD_TOPO_MAX_TILES ) ) FD_LOG_ERR(( "too many tiles %lu", topo->tile_cnt ));
161 :
162 6 : ulong kind_id = 0UL;
163 6 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
164 0 : if( !strcmp( topo->tiles[ i ].name, tile_name ) ) kind_id++;
165 0 : }
166 :
167 6 : fd_topo_tile_t * tile = &topo->tiles[ topo->tile_cnt ];
168 6 : strncpy( tile->name, tile_name, sizeof(tile->name) );
169 6 : tile->id = topo->tile_cnt;
170 6 : tile->kind_id = kind_id;
171 6 : tile->is_agave = is_agave;
172 6 : tile->cpu_idx = cpu_idx;
173 6 : tile->in_cnt = 0UL;
174 6 : tile->out_cnt = 0UL;
175 6 : tile->event_link_id = ULONG_MAX;
176 6 : tile->uses_obj_cnt = 0UL;
177 :
178 6 : fd_topo_obj_t * tile_obj = fd_topob_obj( topo, "tile", tile_wksp );
179 6 : tile->tile_obj_id = tile_obj->id;
180 6 : fd_topob_tile_uses( topo, tile, tile_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
181 :
182 6 : fd_topo_obj_t * obj = fd_topob_obj( topo, "metrics", metrics_wksp );
183 6 : tile->metrics_obj_id = obj->id;
184 6 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
185 :
186 6 : if( FD_LIKELY( uses_id_keyswitch ) ) {
187 0 : obj = fd_topob_obj( topo, "keyswitch", tile_wksp );
188 0 : tile->id_keyswitch_obj_id = obj->id;
189 0 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
190 6 : } else {
191 6 : tile->id_keyswitch_obj_id = ULONG_MAX;
192 6 : }
193 :
194 6 : if( FD_UNLIKELY( uses_av_keyswitch ) ) {
195 0 : obj = fd_topob_obj( topo, "keyswitch", tile_wksp );
196 0 : tile->av_keyswitch_obj_id = obj->id;
197 0 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
198 6 : } else {
199 6 : tile->av_keyswitch_obj_id = ULONG_MAX;
200 6 : }
201 :
202 6 : topo->tile_cnt++;
203 6 : return tile;
204 6 : }
205 :
206 : void
207 : fd_topob_tile_in( fd_topo_t * topo,
208 : char const * tile_name,
209 : ulong tile_kind_id,
210 : char const * fseq_wksp,
211 : char const * link_name,
212 : ulong link_kind_id,
213 : int reliable,
214 15 : int polled ) {
215 15 : if( FD_UNLIKELY( !topo || !tile_name || !fseq_wksp || !link_name ) ) FD_LOG_ERR(( "NULL args" ));
216 :
217 15 : ulong tile_id = fd_topo_find_tile( topo, tile_name, tile_kind_id );
218 15 : if( FD_UNLIKELY( tile_id==ULONG_MAX ) ) FD_LOG_ERR(( "tile not found: %s:%lu", tile_name, tile_kind_id ));
219 15 : fd_topo_tile_t * tile = &topo->tiles[ tile_id ];
220 :
221 15 : ulong link_id = fd_topo_find_link( topo, link_name, link_kind_id );
222 15 : if( FD_UNLIKELY( link_id==ULONG_MAX ) ) FD_LOG_ERR(( "link not found: %s:%lu", link_name, link_kind_id ));
223 15 : fd_topo_link_t * link = &topo->links[ link_id ];
224 :
225 15 : if( FD_UNLIKELY( tile->in_cnt>=FD_TOPO_MAX_TILE_IN_LINKS ) ) FD_LOG_ERR(( "too many in links: %s:%lu", tile_name, tile_kind_id ) );
226 15 : tile->in_link_id[ tile->in_cnt ] = link->id;
227 15 : tile->in_link_reliable[ tile->in_cnt ] = reliable;
228 15 : tile->in_link_poll[ tile->in_cnt ] = polled;
229 15 : fd_topo_obj_t * obj = fd_topob_obj( topo, "fseq", fseq_wksp );
230 15 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
231 15 : tile->in_link_fseq_obj_id[ tile->in_cnt ] = obj->id;
232 15 : tile->in_cnt++;
233 :
234 15 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->mcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY );
235 15 : if( FD_LIKELY( link->mtu ) ) {
236 3 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY );
237 3 : }
238 15 : }
239 :
240 : void
241 : fd_topob_tile_out( fd_topo_t * topo,
242 : char const * tile_name,
243 : ulong tile_kind_id,
244 : char const * link_name,
245 3 : ulong link_kind_id ) {
246 3 : ulong tile_id = fd_topo_find_tile( topo, tile_name, tile_kind_id );
247 3 : if( FD_UNLIKELY( tile_id==ULONG_MAX ) ) FD_LOG_ERR(( "tile not found: %s:%lu", tile_name, tile_kind_id ));
248 3 : fd_topo_tile_t * tile = &topo->tiles[ tile_id ];
249 :
250 3 : ulong link_id = fd_topo_find_link( topo, link_name, link_kind_id );
251 3 : if( FD_UNLIKELY( link_id==ULONG_MAX ) ) FD_LOG_ERR(( "link not found: %s:%lu", link_name, link_kind_id ));
252 3 : fd_topo_link_t * link = &topo->links[ link_id ];
253 :
254 3 : if( FD_UNLIKELY( tile->out_cnt>=FD_TOPO_MAX_TILE_OUT_LINKS ) ) FD_LOG_ERR(( "too many out links: %s", tile_name ));
255 3 : tile->out_link_id[ tile->out_cnt ] = link->id;
256 3 : tile->out_cnt++;
257 :
258 3 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->mcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_WRITE );
259 3 : if( FD_LIKELY( link->mtu ) ) {
260 3 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_WRITE );
261 3 : }
262 3 : }
263 :
264 : static void
265 0 : validate( fd_topo_t const * topo ) {
266 : /* Objects have valid wksp_ids */
267 0 : for( ulong i=0UL; i<topo->obj_cnt; i++ ) {
268 0 : if( FD_UNLIKELY( topo->objs[ i ].wksp_id>=topo->wksp_cnt ) )
269 0 : FD_LOG_ERR(( "invalid workspace id %lu", topo->objs[ i ].wksp_id ));
270 0 : }
271 :
272 : /* Tile ins are valid */
273 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
274 0 : for( ulong j=0UL; j<topo->tiles[ i ].in_cnt; j++ ) {
275 0 : if( FD_UNLIKELY( topo->tiles[ i ].in_link_id[ j ]>=topo->link_cnt ) )
276 0 : FD_LOG_ERR(( "tile %lu (%s) has invalid in link %lu", i, topo->tiles[ i ].name, topo->tiles[ i ].in_link_id[ j ] ));
277 0 : }
278 0 : }
279 :
280 : /* Tile does not have duplicated ins */
281 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
282 0 : for( ulong j=0UL; j<topo->tiles[ i ].in_cnt; j++ ) {
283 0 : for( ulong k=0UL; k<topo->tiles[ i ].in_cnt; k++ ) {
284 0 : if( FD_UNLIKELY( j==k ) ) continue;
285 0 : if( FD_UNLIKELY( topo->tiles[ i ].in_link_id[ j ] == topo->tiles[ i ].in_link_id[ k ] ) )
286 0 : FD_LOG_ERR(( "tile %lu (%s) has duplicated in link %lu (%s)", i, topo->tiles[ i ].name,
287 0 : topo->tiles[ i ].in_link_id[ j ], topo->links[ topo->tiles[ i ].in_link_id[ j ] ].name ));
288 0 : }
289 0 : }
290 0 : }
291 :
292 : /* Tile does not have duplicated outs */
293 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
294 0 : for( ulong j=0UL; j<topo->tiles[ i ].out_cnt; j++ ) {
295 0 : for( ulong k=0UL; k<topo->tiles[ i ].out_cnt; k++ ) {
296 0 : if( FD_UNLIKELY( j==k ) ) continue;
297 0 : if( FD_UNLIKELY( topo->tiles[ i ].out_link_id[ j ] == topo->tiles[ i ].out_link_id[ k ] ) )
298 0 : FD_LOG_ERR(( "tile %lu (%s) has duplicated out link %lu (%s)", i, topo->tiles[ i ].name,
299 0 : topo->tiles[ i ].out_link_id[ j ], topo->links[ topo->tiles[ i ].out_link_id[ j ] ].name ));
300 0 : }
301 0 : }
302 0 : }
303 :
304 : /* Tile outs are different than ins */
305 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
306 0 : for( ulong j=0UL; j<topo->tiles[ i ].out_cnt; j++ ) {
307 0 : for( ulong k=0UL; k<topo->tiles[ i ].in_cnt; k++ ) {
308 0 : char const * link_name = topo->links[ topo->tiles[ i ].out_link_id[ j ] ].name;
309 : /* PoH tile "publishes" this on behalf of Agave, so it's not
310 : a real circular link. */
311 0 : if( FD_UNLIKELY( !strcmp( link_name, "stake_out" ) ||
312 0 : !strcmp( link_name, "crds_shred" ) ) ) continue;
313 :
314 0 : if( FD_UNLIKELY( topo->tiles[ i ].out_link_id[ j ] == topo->tiles[ i ].in_link_id[ k ] ) )
315 0 : FD_LOG_ERR(( "tile %lu has out link %lu same as in", i, topo->tiles[ i ].out_link_id[ j ] ));
316 0 : }
317 0 : }
318 0 : }
319 :
320 : /* Non polling tile ins are also not reliable */
321 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
322 0 : for( ulong j=0UL; j<topo->tiles[ i ].in_cnt; j++ ) {
323 0 : if( FD_UNLIKELY( !topo->tiles[ i ].in_link_poll[ j ] && topo->tiles[ i ].in_link_reliable[ j ] ) )
324 0 : FD_LOG_ERR(( "tile %lu has in link %lu which is not polled but reliable", i, topo->tiles[ i ].in_link_id[ j ] ));
325 0 : }
326 0 : }
327 :
328 : /* Tile outs are valid */
329 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
330 0 : for( ulong j=0UL; j<topo->tiles[ i ].out_cnt; j++ ) {
331 0 : if( FD_UNLIKELY( topo->tiles[ i ].out_link_id[ j ] >= topo->link_cnt ) )
332 0 : FD_LOG_ERR(( "tile %lu has invalid out link %lu", i, topo->tiles[ i ].out_link_id[ j ] ));
333 0 : }
334 0 : }
335 :
336 : /* Workspace names are unique */
337 0 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
338 0 : for( ulong j=0UL; j<topo->wksp_cnt; j++ ) {
339 0 : if( FD_UNLIKELY( i==j ) ) continue;
340 0 : if( FD_UNLIKELY( !strcmp( topo->workspaces[ i ].name, topo->workspaces[ j ].name ) ) )
341 0 : FD_LOG_ERR(( "duplicate workspace name %s", topo->workspaces[ i ].name ));
342 0 : }
343 0 : }
344 :
345 : /* Each workspace is identified correctly */
346 0 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
347 0 : if( FD_UNLIKELY( topo->workspaces[ i ].id != i ) )
348 0 : FD_LOG_ERR(( "workspace %lu has id %lu", i, topo->workspaces[ i ].id ));
349 0 : }
350 :
351 : /* Each link has exactly one producer */
352 0 : for( ulong i=0UL; i<topo->link_cnt; i++ ) {
353 0 : ulong producer_cnt = 0;
354 0 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
355 0 : for( ulong k=0UL; k<topo->tiles[ j ].out_cnt; k++ ) {
356 0 : if( topo->tiles[ j ].out_link_id[ k ]==i ) producer_cnt++;
357 0 : }
358 0 : }
359 0 : if( FD_UNLIKELY( producer_cnt>1UL || ( producer_cnt==0UL && !topo->links[ i ].permit_no_producers ) ) )
360 0 : FD_LOG_ERR(( "link %lu (%s:%lu) has %lu producers", i, topo->links[ i ].name, topo->links[ i ].kind_id, producer_cnt ));
361 0 : }
362 :
363 : /* Each link has at least one consumer */
364 0 : for( ulong i=0UL; i<topo->link_cnt; i++ ) {
365 0 : ulong cnt = fd_topo_link_consumer_cnt( topo, &topo->links[ i ] );
366 0 : if( FD_UNLIKELY( cnt < 1UL && !topo->links[ i ].permit_no_consumers ) ) {
367 0 : FD_LOG_ERR(( "link %lu (%s:%lu) has 0 consumers", i, topo->links[ i ].name, topo->links[ i ].kind_id ));
368 0 : }
369 0 : }
370 0 : }
371 :
372 : /* Tiles that yield to the kernel scheduler */
373 : static char const * FLOATING[] = {
374 : "netlnk",
375 : "metric",
376 : "diag",
377 : "bencho",
378 : "genesi", /* FIREDANCER ONLY */
379 : "ipecho", /* FIREDANCER ONLY */
380 : NULL
381 : };
382 :
383 : /* Tiles only active on startup
384 : (Must shut down after snapshot load) */
385 : static char const * STARTUP[] = {
386 : "genesi", /* FIREDANCER only */
387 : "snapct", /* FIREDANCER only */
388 : "snapld", /* FIREDANCER only */
389 : "snapdc", /* FIREDANCER only */
390 : "snapin", /* FIREDANCER only */
391 : "snapwr", /* FIREDANCER only */
392 : NULL
393 : };
394 :
395 : /* Tiles only active post startup
396 : (Must sleep until snapshot load finishes) */
397 : static char const * POST_START[] = {
398 : "accdb", /* FIREDANCER only */
399 : "execle", /* FIREDANCER only */
400 : "poh", /* FIREDANCER only */
401 : "execrp", /* FIREDANCER only */
402 : "txsend", /* FIREDANCER only */
403 : NULL
404 : };
405 :
406 : /* Tiles that are always active */
407 : static char const * ALWAYS[] = {
408 : "backt",
409 : "benchg",
410 : "benchs",
411 : "net",
412 : "sock",
413 : "quic",
414 : "bundle",
415 : "verify",
416 : "dedup",
417 : "resolh", /* FRANK only */
418 : "resolv", /* FIREDANCER only */
419 : "pack",
420 : "bank", /* FRANK only */
421 : "pohh", /* FRANK only */
422 : "sign",
423 : "shred",
424 : "event", /* FIREDANCER only */
425 : "store", /* FRANK only */
426 : "plugin", /* FRANK only */
427 : "gui", /* FIREDANCER only */
428 : "guih", /* FRANK only */
429 : "rpc", /* FIREDANCER only */
430 : "gossvf", /* FIREDANCER only */
431 : "gossip", /* FIREDANCER only */
432 : "repair", /* FIREDANCER only */
433 : "rserve", /* FIREDANCER only */
434 : "replay", /* FIREDANCER only */
435 : "tower", /* FIREDANCER only */
436 : "pktgen",
437 : "forkt", /* FIREDANCER only */
438 : NULL
439 : };
440 :
441 : /* Tiles that should not have a SMT neighbor */
442 : static char const * CRITICAL_TILES[] = {
443 : "pack",
444 : "poh",
445 : "pohh",
446 : "gui",
447 : "guih",
448 : NULL
449 : };
450 :
451 : int
452 0 : fd_topob_tile_priority_type( char const * name ) {
453 0 : for( char const ** p = FLOATING; *p; p++ ) {
454 0 : if( !strcmp( name, *p ) ) return FD_TOPOB_PRIORITY_FLOATING;
455 0 : }
456 0 : for( char const ** p = STARTUP; *p; p++ ) {
457 0 : if( !strcmp( name, *p ) ) return FD_TOPOB_PRIORITY_STARTUP;
458 0 : }
459 0 : for( char const ** p = CRITICAL_TILES; *p; p++ ) {
460 0 : if( !strcmp( name, *p ) ) return FD_TOPOB_PRIORITY_CRITICAL;
461 0 : }
462 0 : for( char const ** p = POST_START; *p; p++ ) {
463 0 : if( !strcmp( name, *p ) ) return FD_TOPOB_PRIORITY_NORMAL;
464 0 : }
465 0 : for( char const ** p = ALWAYS; *p; p++ ) {
466 0 : if( !strcmp( name, *p ) ) return FD_TOPOB_PRIORITY_NORMAL;
467 0 : }
468 0 : return FD_TOPOB_PRIORITY_FLOATING;
469 0 : }
470 :
471 : static void
472 : auto_tile_cpu( fd_topo_tile_t * tile,
473 : fd_topo_cpus_t * cpus,
474 : ulong * cpu_idx_p,
475 : cpu_bv_t cpu_assigned[ static cpu_bv_word_cnt ],
476 : ushort const cpu_ordering[ static FD_TILE_MAX ],
477 1131 : _Bool skip_ht_pairs ) {
478 1131 : ulong cpu_idx = *cpu_idx_p;
479 :
480 1131 : ulong cpu_cnt = cpus->cpu_cnt;
481 2934 : while( cpu_idx<cpu_cnt && cpu_bv_test( cpu_assigned, cpu_ordering[ cpu_idx ] ) ) cpu_idx++;
482 1131 : if( FD_UNLIKELY( cpu_idx>=cpu_cnt ) ) {
483 0 : FD_LOG_ERR(( "auto layout cannot set affinity for tile `%s:%lu` because all the CPUs are already assigned", tile->name, tile->kind_id ));
484 0 : }
485 :
486 : /* Certain tiles are latency and throughput critical and
487 : should not get a HT pair assigned. */
488 1131 : fd_topo_cpu_t const * cpu = &cpus->cpu[ cpu_ordering[ cpu_idx ] ];
489 :
490 1131 : int is_ht_critical = 0;
491 1131 : if( FD_UNLIKELY( cpu->sibling!=ULONG_MAX ) ) {
492 6393 : for( char const ** p = CRITICAL_TILES; *p; p++ ) {
493 5379 : if( !strcmp( tile->name, *p ) ) {
494 117 : is_ht_critical = 1;
495 117 : break;
496 117 : }
497 5379 : }
498 1131 : }
499 :
500 1131 : if( FD_UNLIKELY( is_ht_critical || skip_ht_pairs ) ) {
501 657 : ulong try_assign = cpu_idx;
502 672 : while( cpu_bv_test( cpu_assigned, cpu_ordering[ try_assign ] ) ||
503 672 : ( cpus->cpu[ cpu_ordering[ try_assign ] ].sibling!=ULONG_MAX &&
504 672 : cpu_bv_test( cpu_assigned, cpus->cpu[ cpu_ordering[ try_assign ] ].sibling ) ) ) {
505 15 : try_assign++;
506 15 : if( FD_UNLIKELY( try_assign>=cpus->cpu_cnt ) ) FD_LOG_ERR(( "auto layout cannot set affinity for tile `%s:%lu` because all the CPUs are already assigned or have a HT pair assigned", tile->name, tile->kind_id ));
507 15 : }
508 :
509 657 : ulong sibling = cpus->cpu[ cpu_ordering[ try_assign ] ].sibling;
510 657 : cpu_bv_insert( cpu_assigned, cpu_ordering[ try_assign ] );
511 657 : if( sibling!=ULONG_MAX ) {
512 657 : cpu_bv_insert( cpu_assigned, sibling );
513 657 : }
514 657 : tile->cpu_idx = cpu_ordering[ try_assign ];
515 657 : } else {
516 474 : cpu_bv_insert( cpu_assigned, cpu_ordering[ cpu_idx ] );
517 474 : tile->cpu_idx = cpu_ordering[ cpu_idx ];
518 474 : }
519 :
520 1131 : *cpu_idx_p = cpu_idx;
521 1131 : }
522 :
523 : void
524 : fd_topob_auto_layout_cpus( fd_topo_t * topo,
525 : fd_topo_cpus_t * cpus,
526 39 : int reserve_agave_cores ) {
527 : /* Incredibly simple automatic layout system for now ... just assign
528 : tiles to CPU cores in NUMA sequential order, except for a few tiles
529 : which should be floating. */
530 :
531 1308 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
532 1269 : fd_topo_tile_t * tile = &topo->tiles[ i ];
533 1269 : tile->cpu_idx = ULONG_MAX;
534 1269 : }
535 :
536 39 : ushort cpu_ordering[ FD_TILE_MAX ] = {0};
537 39 : cpu_bv_t pairs_assigned[ cpu_bv_word_cnt ]; cpu_bv_new( pairs_assigned );
538 39 : FD_STATIC_ASSERT( FD_TILE_MAX<=USHORT_MAX, layout );
539 :
540 39 : ulong next_cpu_idx = 0UL;
541 78 : for( ulong i=0UL; i<cpus->numa_node_cnt; i++ ) {
542 4167 : for( ulong j=0UL; j<cpus->cpu_cnt; j++ ) {
543 4128 : fd_topo_cpu_t * cpu = &cpus->cpu[ j ];
544 :
545 4128 : if( FD_UNLIKELY( cpu_bv_test( pairs_assigned, j ) || cpu->numa_node!=i ) ) continue;
546 :
547 2064 : FD_TEST( next_cpu_idx<FD_TILE_MAX );
548 2064 : cpu_ordering[ next_cpu_idx++ ] = (ushort)j;
549 :
550 2064 : if( FD_UNLIKELY( cpu->sibling!=ULONG_MAX ) ) {
551 : /* If the CPU has a HT pair, place it immediately after so they
552 : are sequentially assigned. */
553 2064 : FD_TEST( next_cpu_idx<FD_TILE_MAX );
554 2064 : cpu_ordering[ next_cpu_idx++ ] = (ushort)cpu->sibling;
555 2064 : cpu_bv_insert( pairs_assigned, cpu->sibling );
556 2064 : }
557 2064 : }
558 39 : }
559 :
560 39 : FD_TEST( next_cpu_idx==cpus->cpu_cnt );
561 :
562 : /* excluded cpus are simply considered already assigned */
563 39 : cpu_bv_t cpu_assigned[ cpu_bv_word_cnt ];
564 39 : cpu_bv_new( cpu_assigned );
565 123 : for( ulong i=0UL; i<topo->blocklist_cores_cnt; i++ ) {
566 84 : FD_TEST( topo->blocklist_cores_cpu_idx[ i ]<FD_TILE_MAX );
567 84 : cpu_bv_insert( cpu_assigned, topo->blocklist_cores_cpu_idx[ i ] );
568 84 : }
569 :
570 : /* Compute total number of available physical cores */
571 39 : ulong available_physical = 0UL;
572 4167 : for( ulong i=0UL; i<cpus->cpu_cnt; i++ ) {
573 4128 : if( !cpu_bv_test( cpu_assigned, i ) &&
574 4128 : !cpu_bv_test( pairs_assigned, i ) &&
575 4128 : ( cpus->cpu[ i ].sibling==ULONG_MAX ||
576 2022 : !cpu_bv_test( cpu_assigned, cpus->cpu[ i ].sibling ) ) ) {
577 2022 : available_physical++;
578 2022 : }
579 4128 : }
580 :
581 : /* Compute total number of tiles that need assignment */
582 39 : ulong always_tiles_to_assign = 0UL;
583 39 : ulong post_start_tiles_to_assign = 0UL;
584 39 : ulong startup_tiles_to_assign = 0UL;
585 1308 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
586 6978 : for( char const ** p = POST_START; *p; p++ ) {
587 5985 : if( !strcmp( topo->tiles[ j ].name, *p ) ) {
588 276 : post_start_tiles_to_assign++;
589 276 : break;
590 276 : }
591 5985 : }
592 24351 : for( char const ** p = ALWAYS; *p; p++ ) {
593 23916 : if( !strcmp( topo->tiles[ j ].name, *p ) ) {
594 834 : always_tiles_to_assign++;
595 834 : break;
596 834 : }
597 23916 : }
598 8757 : for( char const ** p = STARTUP; *p; p++ ) {
599 7509 : if( !strcmp( topo->tiles[ j ].name, *p ) ) {
600 21 : startup_tiles_to_assign++;
601 21 : break;
602 21 : }
603 7509 : }
604 1269 : }
605 39 : ulong tiles_to_assign = always_tiles_to_assign +
606 39 : fd_ulong_max( startup_tiles_to_assign, post_start_tiles_to_assign );
607 :
608 : /* If we have enough physical cores (excluding HT siblings) for all
609 : tiles that need assignment, exclude HT siblings so that no tile
610 : gets scheduled on a hyperthread pair.
611 : For Frankendancer, we reserve 2x cores so we have enough for Agave */
612 39 : _Bool skip_ht_pairs = reserve_agave_cores
613 39 : ? (available_physical>=2*tiles_to_assign) /* Frankendancer */
614 39 : : (available_physical>=tiles_to_assign); /* Firedancer */
615 :
616 : /* First, assign always-on tiles */
617 39 : ulong cpu_idx = 0UL;
618 1209 : for( char const ** p = ALWAYS; *p; p++ ) {
619 39240 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
620 38070 : fd_topo_tile_t * tile = &topo->tiles[ j ];
621 38070 : if( !strcmp( tile->name, *p ) ) {
622 834 : auto_tile_cpu( tile, cpus, &cpu_idx, cpu_assigned, cpu_ordering, skip_ht_pairs );
623 834 : }
624 38070 : }
625 1170 : }
626 39 : ulong cpu_idx_startup = cpu_idx;
627 39 : cpu_bv_t cpu_assigned_startup[ cpu_bv_word_cnt ];
628 39 : cpu_bv_copy( cpu_assigned_startup, cpu_assigned );
629 :
630 : /* Separately assign startup and post-start tiles */
631 273 : for( char const ** p = STARTUP; *p; p++ ) {
632 7848 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
633 7614 : fd_topo_tile_t * tile = &topo->tiles[ j ];
634 7614 : if( !strcmp( tile->name, *p ) ) {
635 21 : auto_tile_cpu( tile, cpus, &cpu_idx_startup, cpu_assigned_startup, cpu_ordering, skip_ht_pairs );
636 21 : }
637 7614 : }
638 234 : }
639 234 : for( char const ** p = POST_START; *p; p++ ) {
640 6540 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
641 6345 : fd_topo_tile_t * tile = &topo->tiles[ j ];
642 6345 : if( !strcmp( tile->name, *p ) ) {
643 276 : auto_tile_cpu( tile, cpus, &cpu_idx, cpu_assigned, cpu_ordering, skip_ht_pairs );
644 276 : }
645 6345 : }
646 195 : }
647 :
648 : /* Make sure all the tiles we haven't set are supposed to be floating. */
649 1308 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
650 1269 : fd_topo_tile_t * tile = &topo->tiles[ i ];
651 1269 : if( tile->cpu_idx!=ULONG_MAX ) continue;
652 :
653 138 : int found = 0;
654 360 : for( char const ** p = FLOATING; *p; p++ ) {
655 360 : if( !strcmp( tile->name, *p ) ) {
656 138 : found = 1;
657 138 : break;
658 138 : }
659 360 : }
660 :
661 138 : if( FD_UNLIKELY( !found ) ) FD_LOG_WARNING(( "auto layout cannot affine tile `%s:%lu` because it is unknown. Leaving it floating", tile->name, tile->kind_id ));
662 138 : }
663 :
664 39 : topo->agave_affinity_cnt = 0UL;
665 39 : if( FD_UNLIKELY( reserve_agave_cores ) ) {
666 1386 : for( ulong i=cpu_idx; i<cpus->cpu_cnt; i++ ) {
667 1368 : if( FD_UNLIKELY( !cpus->cpu[ cpu_ordering[ i ] ].online ) ) continue;
668 1368 : if( FD_UNLIKELY( cpu_bv_test( cpu_assigned, cpu_ordering[ i ] ) ) ) continue;
669 :
670 1332 : if( FD_LIKELY( topo->agave_affinity_cnt<sizeof(topo->agave_affinity_cpu_idx)/sizeof(topo->agave_affinity_cpu_idx[0]) ) ) {
671 1332 : topo->agave_affinity_cpu_idx[ topo->agave_affinity_cnt++ ] = cpu_ordering[ i ];
672 1332 : }
673 1332 : }
674 18 : }
675 39 : }
676 :
677 : void
678 : fd_topob_auto_layout( fd_topo_t * topo,
679 0 : int reserve_agave_cores ) {
680 0 : fd_topo_cpus_t cpus[1];
681 0 : fd_topo_cpus_init( cpus );
682 0 : fd_topob_auto_layout_cpus( topo, cpus, reserve_agave_cores );
683 0 : }
684 :
685 : ulong
686 : fd_numa_node_idx( ulong cpu_idx );
687 :
688 : static void
689 0 : initialize_numa_assignments( fd_topo_t * topo ) {
690 : /* Assign workspaces to NUMA nodes. The heuristic here is pretty
691 : simple for now: workspaces go on the NUMA node of the first
692 : tile which maps the largest object in the workspace. */
693 :
694 0 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
695 0 : ulong max_footprint = 0UL;
696 0 : ulong max_obj = ULONG_MAX;
697 :
698 0 : for( ulong j=0UL; j<topo->obj_cnt; j++ ) {
699 0 : fd_topo_obj_t * obj = &topo->objs[ j ];
700 0 : if( obj->wksp_id!=i ) continue;
701 0 : if( FD_UNLIKELY( !obj->footprint ) ) FD_LOG_ERR(( "obj %lu (%s) has invalid parameters", j, obj->name ));
702 :
703 0 : if( FD_UNLIKELY( !max_footprint || obj->footprint>max_footprint ) ) {
704 0 : max_footprint = obj->footprint;
705 0 : max_obj = j;
706 0 : }
707 0 : }
708 :
709 0 : if( FD_UNLIKELY( max_obj==ULONG_MAX ) ) FD_LOG_ERR(( "no object found for workspace %s", topo->workspaces[ i ].name ));
710 :
711 0 : int found_strict = 0;
712 0 : int found_lazy = 0;
713 0 : int found_assigned = 0;
714 0 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
715 0 : fd_topo_tile_t * tile = &topo->tiles[ j ];
716 0 : if( FD_UNLIKELY( tile->tile_obj_id==max_obj && tile->cpu_idx<FD_TILE_MAX ) ) {
717 0 : topo->workspaces[ i ].numa_idx = fd_numa_node_idx( tile->cpu_idx );
718 0 : FD_TEST( topo->workspaces[ i ].numa_idx!=ULONG_MAX );
719 0 : found_strict = 1;
720 0 : found_lazy = 1;
721 0 : found_assigned = 1;
722 0 : break;
723 0 : } else if( FD_UNLIKELY( tile->tile_obj_id==max_obj && tile->cpu_idx>=FD_TILE_MAX ) ) {
724 0 : topo->workspaces[ i ].numa_idx = 0;
725 0 : found_lazy = 1;
726 0 : break;
727 0 : }
728 0 : }
729 :
730 0 : if( FD_LIKELY( !found_strict ) ) {
731 0 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
732 0 : fd_topo_tile_t * tile = &topo->tiles[ j ];
733 0 : for( ulong k=0UL; k<tile->uses_obj_cnt; k++ ) {
734 0 : if( FD_LIKELY( tile->uses_obj_id[ k ]==max_obj && tile->cpu_idx<FD_TILE_MAX ) ) {
735 0 : topo->workspaces[ i ].numa_idx = fd_numa_node_idx( tile->cpu_idx );
736 0 : FD_TEST( topo->workspaces[ i ].numa_idx!=ULONG_MAX );
737 0 : found_lazy = 1;
738 0 : found_assigned = 1;
739 0 : break;
740 0 : } else if( FD_UNLIKELY( tile->uses_obj_id[ k ]==max_obj ) && tile->cpu_idx>=FD_TILE_MAX ) {
741 0 : topo->workspaces[ i ].numa_idx = 0;
742 0 : found_lazy = 1;
743 : /* Don't break, keep looking -- a tile with a CPU assignment
744 : might also use object in which case we want to use that
745 : NUMA node. */
746 0 : }
747 0 : }
748 :
749 0 : if( FD_UNLIKELY( found_assigned ) ) break;
750 0 : }
751 0 : }
752 :
753 0 : if( FD_UNLIKELY( !found_lazy ) ) FD_LOG_ERR(( "no tile uses object %s for workspace %s", topo->objs[ max_obj ].name, topo->workspaces[ i ].name ));
754 0 : }
755 0 : }
756 :
757 : void
758 : fd_topob_finish( fd_topo_t * topo,
759 0 : fd_topo_obj_callbacks_t ** callbacks ) {
760 0 : for( ulong z=0UL; z<topo->tile_cnt; z++ ) {
761 0 : fd_topo_tile_t * tile = &topo->tiles[ z ];
762 :
763 0 : ulong in_cnt = 0UL;
764 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
765 0 : if( FD_UNLIKELY( !tile->in_link_poll[ i ] ) ) continue;
766 0 : in_cnt++;
767 0 : }
768 :
769 0 : FD_TEST( !fd_pod_replacef_ulong( topo->props, in_cnt, "obj.%lu.in_cnt", tile->metrics_obj_id ) );
770 0 : }
771 :
772 0 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
773 0 : fd_topo_wksp_t * wksp = &topo->workspaces[ i ];
774 :
775 0 : ulong loose_sz = 0UL;
776 0 : for( ulong j=0UL; j<topo->obj_cnt; j++ ) {
777 0 : fd_topo_obj_t * obj = &topo->objs[ j ];
778 0 : if( FD_UNLIKELY( obj->wksp_id!=wksp->id ) ) continue;
779 :
780 0 : fd_topo_obj_callbacks_t * cb = NULL;
781 0 : for( ulong i=0UL; callbacks[ i ]; i++ ) {
782 0 : if( FD_UNLIKELY( !strcmp( callbacks[ i ]->name, obj->name ) ) ) {
783 0 : cb = callbacks[ i ];
784 0 : break;
785 0 : }
786 0 : }
787 0 : if( FD_UNLIKELY( !cb ) ) FD_LOG_ERR(( "no callbacks for object %s", obj->name ));
788 :
789 0 : if( FD_UNLIKELY( cb->loose ) ) loose_sz += cb->loose( topo, obj );
790 0 : }
791 :
792 0 : ulong part_max = wksp->part_max;
793 0 : if( !part_max ) part_max = (loose_sz / (64UL << 10)); /* alloc + residual padding */
794 0 : part_max += 3; /* for initial alignment */
795 0 : ulong offset = fd_ulong_align_up( fd_wksp_private_data_off( part_max ), fd_topo_workspace_align() );
796 :
797 0 : for( ulong j=0UL; j<topo->obj_cnt; j++ ) {
798 0 : fd_topo_obj_t * obj = &topo->objs[ j ];
799 0 : if( FD_UNLIKELY( obj->wksp_id!=wksp->id ) ) continue;
800 :
801 0 : fd_topo_obj_callbacks_t * cb = NULL;
802 0 : for( ulong i=0UL; callbacks[ i ]; i++ ) {
803 0 : if( FD_UNLIKELY( !strcmp( callbacks[ i ]->name, obj->name ) ) ) {
804 0 : cb = callbacks[ i ];
805 0 : break;
806 0 : }
807 0 : }
808 0 : if( FD_UNLIKELY( !cb ) ) FD_LOG_ERR(( "no callbacks for object %s", obj->name ));
809 :
810 0 : ulong align_ = cb->align( topo, obj );
811 0 : if( FD_UNLIKELY( !fd_ulong_is_pow2( align_ ) ) ) FD_LOG_ERR(( "Return value of fdctl_obj_align(%s,%lu) is not a power of 2", obj->name, obj->id ));
812 0 : offset = fd_ulong_align_up( offset, align_ );
813 0 : obj->offset = offset;
814 0 : obj->footprint = cb->footprint( topo, obj );
815 0 : if( FD_UNLIKELY( 0!=strcmp( obj->name, "tile" ) && (!obj->footprint || obj->footprint>LONG_MAX) ) ) {
816 0 : FD_LOG_ERR(( "fdctl_obj_footprint(%s,%lu) failed", obj->name, obj->id ));
817 0 : }
818 0 : offset += obj->footprint;
819 0 : }
820 :
821 0 : ulong footprint = fd_ulong_align_up( offset, fd_topo_workspace_align() );
822 :
823 0 : part_max = fd_ulong_max( part_max, wksp->min_part_max );
824 0 : loose_sz = fd_ulong_max( loose_sz, wksp->min_loose_sz );
825 :
826 : /* Compute footprint for a workspace that can store our footprint,
827 : with an extra align of padding incase gaddr_lo is not aligned. */
828 0 : ulong total_wksp_footprint = fd_wksp_footprint( part_max, footprint + fd_topo_workspace_align() + loose_sz );
829 :
830 0 : ulong page_sz = topo->max_page_size;
831 0 : if( total_wksp_footprint < topo->gigantic_page_threshold ) page_sz = FD_SHMEM_HUGE_PAGE_SZ;
832 0 : if( FD_UNLIKELY( page_sz!=FD_SHMEM_HUGE_PAGE_SZ && page_sz!=FD_SHMEM_GIGANTIC_PAGE_SZ ) ) FD_LOG_ERR(( "invalid page_sz" ));
833 :
834 0 : ulong wksp_aligned_footprint = fd_ulong_align_up( total_wksp_footprint, page_sz );
835 :
836 : /* Give any leftover space in the underlying shared memory to the
837 : data region of the workspace, since we might as well use it. */
838 0 : wksp->part_max = part_max;
839 0 : wksp->known_footprint = footprint;
840 0 : wksp->total_footprint = wksp_aligned_footprint - fd_ulong_align_up( fd_wksp_private_data_off( part_max ), fd_topo_workspace_align() );
841 0 : wksp->page_sz = page_sz;
842 0 : wksp->page_cnt = wksp_aligned_footprint / page_sz;
843 0 : }
844 :
845 0 : initialize_numa_assignments( topo );
846 :
847 0 : validate( topo );
848 0 : }
|