Line data Source code
1 : #include "fd_topob.h"
2 :
3 : #include "../../util/pod/fd_pod_format.h"
4 : #include "fd_cpu_topo.h"
5 :
6 : fd_topo_t *
7 : fd_topob_new( void * mem,
8 3 : char const * app_name ) {
9 3 : fd_topo_t * topo = (fd_topo_t *)mem;
10 :
11 3 : if( FD_UNLIKELY( !topo ) ) {
12 0 : FD_LOG_WARNING( ( "NULL topo" ) );
13 0 : return NULL;
14 0 : }
15 :
16 3 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)topo, alignof(fd_topo_t) ) ) ) {
17 0 : FD_LOG_WARNING( ( "misaligned topo" ) );
18 0 : return NULL;
19 0 : }
20 :
21 3 : fd_memset( topo, 0, sizeof(fd_topo_t) );
22 :
23 3 : FD_TEST( fd_pod_new( topo->props, sizeof(topo->props) ) );
24 :
25 3 : if( FD_UNLIKELY( strlen( app_name )>=sizeof(topo->app_name) ) ) FD_LOG_ERR(( "app_name too long: %s", app_name ));
26 3 : strncpy( topo->app_name, app_name, sizeof(topo->app_name) );
27 :
28 3 : topo->max_page_size = FD_SHMEM_GIGANTIC_PAGE_SZ;
29 3 : topo->gigantic_page_threshold = 4 * FD_SHMEM_HUGE_PAGE_SZ;
30 :
31 3 : topo->agave_affinity_cnt = 0;
32 3 : topo->blocklist_cores_cnt = 0;
33 :
34 3 : return topo;
35 3 : }
36 :
37 : fd_topo_wksp_t *
38 : fd_topob_wksp( fd_topo_t * topo,
39 6 : char const * name ) {
40 6 : if( FD_UNLIKELY( !topo || !name || !strlen( name ) ) ) FD_LOG_ERR(( "NULL args" ));
41 6 : if( FD_UNLIKELY( strlen( name )>=sizeof(topo->workspaces[ topo->wksp_cnt ].name ) ) ) FD_LOG_ERR(( "wksp name too long: %s", name ));
42 6 : if( FD_UNLIKELY( topo->wksp_cnt>=FD_TOPO_MAX_WKSPS ) ) FD_LOG_ERR(( "too many workspaces" ));
43 :
44 6 : fd_topo_wksp_t * wksp = &topo->workspaces[ topo->wksp_cnt ];
45 6 : strncpy( wksp->name, name, sizeof(wksp->name) );
46 6 : wksp->id = topo->wksp_cnt;
47 6 : wksp->core_dump_level = FD_TOPO_CORE_DUMP_LEVEL_REGULAR;
48 6 : topo->wksp_cnt++;
49 6 : return wksp;
50 6 : }
51 :
52 : fd_topo_obj_t *
53 : fd_topob_obj( fd_topo_t * topo,
54 : char const * obj_name,
55 66 : char const * wksp_name ) {
56 66 : if( FD_UNLIKELY( !topo || !obj_name || !wksp_name ) ) FD_LOG_ERR(( "NULL args" ));
57 66 : if( FD_UNLIKELY( strlen( obj_name )>=sizeof(topo->objs[ topo->obj_cnt ].name ) ) ) FD_LOG_ERR(( "obj name too long: %s", obj_name ));
58 66 : if( FD_UNLIKELY( topo->obj_cnt>=FD_TOPO_MAX_OBJS ) ) FD_LOG_ERR(( "too many objects" ));
59 :
60 66 : ulong wksp_id = fd_topo_find_wksp( topo, wksp_name );
61 66 : if( FD_UNLIKELY( wksp_id==ULONG_MAX ) ) FD_LOG_ERR(( "workspace not found: %s", wksp_name ));
62 :
63 66 : fd_topo_obj_t * obj = &topo->objs[ topo->obj_cnt ];
64 66 : memset( obj, 0, sizeof(fd_topo_obj_t) );
65 66 : strncpy( obj->name, obj_name, sizeof(obj->name) );
66 66 : obj->id = topo->obj_cnt;
67 66 : obj->wksp_id = wksp_id;
68 66 : obj->label_idx = ULONG_MAX;
69 66 : topo->obj_cnt++;
70 :
71 66 : return obj;
72 66 : }
73 :
74 : fd_topo_obj_t *
75 : fd_topob_obj_named( fd_topo_t * topo,
76 : char const * obj_type,
77 : char const * wksp_name,
78 0 : char const * label ) {
79 0 : if( FD_UNLIKELY( !label ) ) FD_LOG_ERR(( "NULL args" ));
80 0 : if( FD_UNLIKELY( strlen( label )>=sizeof(topo->objs[ topo->obj_cnt ].label ) ) ) FD_LOG_ERR(( "obj label too long: %s", label ));
81 0 : fd_topo_obj_t * obj = fd_topob_obj( topo, obj_type, wksp_name );
82 0 : if( FD_UNLIKELY( !obj ) ) return NULL;
83 :
84 0 : fd_cstr_ncpy( obj->label, label, sizeof(obj->label) );
85 0 : obj->label_idx = fd_topo_obj_cnt( topo, obj_type, label );
86 :
87 0 : return obj;
88 0 : }
89 :
90 : fd_topo_link_t *
91 : fd_topob_link( fd_topo_t * topo,
92 : char const * link_name,
93 : char const * wksp_name,
94 : ulong depth,
95 : ulong mtu,
96 18 : ulong burst ) {
97 18 : if( FD_UNLIKELY( !topo || !link_name || !wksp_name ) ) FD_LOG_ERR(( "NULL args" ));
98 18 : if( FD_UNLIKELY( strlen( link_name )>=sizeof(topo->links[ topo->link_cnt ].name ) ) ) FD_LOG_ERR(( "link name too long: %s", link_name ));
99 18 : if( FD_UNLIKELY( topo->link_cnt>=FD_TOPO_MAX_LINKS ) ) FD_LOG_ERR(( "too many links" ));
100 :
101 18 : ulong kind_id = 0UL;
102 39 : for( ulong i=0UL; i<topo->link_cnt; i++ ) {
103 21 : if( !strcmp( topo->links[ i ].name, link_name ) ) kind_id++;
104 21 : }
105 :
106 18 : fd_topo_link_t * link = &topo->links[ topo->link_cnt ];
107 18 : strncpy( link->name, link_name, sizeof(link->name) );
108 18 : link->id = topo->link_cnt;
109 18 : link->kind_id = kind_id;
110 18 : link->depth = depth;
111 18 : link->mtu = mtu;
112 18 : link->burst = burst;
113 :
114 18 : fd_topo_obj_t * obj = fd_topob_obj( topo, "mcache", wksp_name );
115 18 : link->mcache_obj_id = obj->id;
116 18 : FD_TEST( fd_pod_insertf_ulong( topo->props, depth, "obj.%lu.depth", obj->id ) );
117 :
118 18 : if( mtu ) {
119 6 : obj = fd_topob_obj( topo, "dcache", wksp_name );
120 6 : link->dcache_obj_id = obj->id;
121 6 : FD_TEST( fd_pod_insertf_ulong( topo->props, depth, "obj.%lu.depth", obj->id ) );
122 6 : FD_TEST( fd_pod_insertf_ulong( topo->props, burst, "obj.%lu.burst", obj->id ) );
123 6 : FD_TEST( fd_pod_insertf_ulong( topo->props, mtu, "obj.%lu.mtu", obj->id ) );
124 6 : }
125 18 : topo->link_cnt++;
126 :
127 18 : return link;
128 18 : }
129 :
130 : void
131 : fd_topob_tile_uses( fd_topo_t * topo,
132 : fd_topo_tile_t * tile,
133 : fd_topo_obj_t const * obj,
134 51 : int mode ) {
135 51 : (void)topo;
136 :
137 51 : if( FD_UNLIKELY( tile->uses_obj_cnt>=FD_TOPO_MAX_TILE_OBJS ) ) FD_LOG_ERR(( "tile `%s` uses too many objects", tile->name ));
138 :
139 51 : tile->uses_obj_id[ tile->uses_obj_cnt ] = obj->id;
140 51 : tile->uses_obj_mode[ tile->uses_obj_cnt ] = mode;
141 51 : tile->uses_obj_cnt++;
142 51 : }
143 :
144 : fd_topo_tile_t *
145 : fd_topob_tile( fd_topo_t * topo,
146 : char const * tile_name,
147 : char const * tile_wksp,
148 : char const * metrics_wksp,
149 : ulong cpu_idx,
150 : int is_agave,
151 : int uses_id_keyswitch,
152 6 : int uses_av_keyswitch ) {
153 :
154 6 : if( FD_UNLIKELY( !topo || !tile_name || !tile_wksp || !metrics_wksp ) ) FD_LOG_ERR(( "NULL args" ));
155 6 : if( FD_UNLIKELY( strlen( tile_name )>=sizeof(topo->tiles[ topo->tile_cnt ].name ) ) ) FD_LOG_ERR(( "tile name too long: %s", tile_name ));
156 6 : if( FD_UNLIKELY( topo->tile_cnt>=FD_TOPO_MAX_TILES ) ) FD_LOG_ERR(( "too many tiles %lu", topo->tile_cnt ));
157 :
158 6 : ulong kind_id = 0UL;
159 6 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
160 0 : if( !strcmp( topo->tiles[ i ].name, tile_name ) ) kind_id++;
161 0 : }
162 :
163 6 : fd_topo_tile_t * tile = &topo->tiles[ topo->tile_cnt ];
164 6 : strncpy( tile->name, tile_name, sizeof(tile->name) );
165 6 : tile->id = topo->tile_cnt;
166 6 : tile->kind_id = kind_id;
167 6 : tile->is_agave = is_agave;
168 6 : tile->cpu_idx = cpu_idx;
169 6 : tile->in_cnt = 0UL;
170 6 : tile->out_cnt = 0UL;
171 6 : tile->uses_obj_cnt = 0UL;
172 :
173 6 : fd_topo_obj_t * tile_obj = fd_topob_obj( topo, "tile", tile_wksp );
174 6 : tile->tile_obj_id = tile_obj->id;
175 6 : fd_topob_tile_uses( topo, tile, tile_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
176 :
177 6 : fd_topo_obj_t * obj = fd_topob_obj( topo, "metrics", metrics_wksp );
178 6 : tile->metrics_obj_id = obj->id;
179 6 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
180 :
181 6 : if( FD_LIKELY( uses_id_keyswitch ) ) {
182 0 : obj = fd_topob_obj( topo, "keyswitch", tile_wksp );
183 0 : tile->id_keyswitch_obj_id = obj->id;
184 0 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
185 6 : } else {
186 6 : tile->id_keyswitch_obj_id = ULONG_MAX;
187 6 : }
188 :
189 6 : if( FD_UNLIKELY( uses_av_keyswitch ) ) {
190 0 : obj = fd_topob_obj( topo, "keyswitch", tile_wksp );
191 0 : tile->av_keyswitch_obj_id = obj->id;
192 0 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
193 6 : } else {
194 6 : tile->av_keyswitch_obj_id = ULONG_MAX;
195 6 : }
196 :
197 6 : topo->tile_cnt++;
198 6 : return tile;
199 6 : }
200 :
201 : void
202 : fd_topob_tile_in( fd_topo_t * topo,
203 : char const * tile_name,
204 : ulong tile_kind_id,
205 : char const * fseq_wksp,
206 : char const * link_name,
207 : ulong link_kind_id,
208 : int reliable,
209 15 : int polled ) {
210 15 : if( FD_UNLIKELY( !topo || !tile_name || !fseq_wksp || !link_name ) ) FD_LOG_ERR(( "NULL args" ));
211 :
212 15 : ulong tile_id = fd_topo_find_tile( topo, tile_name, tile_kind_id );
213 15 : if( FD_UNLIKELY( tile_id==ULONG_MAX ) ) FD_LOG_ERR(( "tile not found: %s:%lu", tile_name, tile_kind_id ));
214 15 : fd_topo_tile_t * tile = &topo->tiles[ tile_id ];
215 :
216 15 : ulong link_id = fd_topo_find_link( topo, link_name, link_kind_id );
217 15 : if( FD_UNLIKELY( link_id==ULONG_MAX ) ) FD_LOG_ERR(( "link not found: %s:%lu", link_name, link_kind_id ));
218 15 : fd_topo_link_t * link = &topo->links[ link_id ];
219 :
220 15 : if( FD_UNLIKELY( tile->in_cnt>=FD_TOPO_MAX_TILE_IN_LINKS ) ) FD_LOG_ERR(( "too many in links: %s:%lu", tile_name, tile_kind_id ) );
221 15 : tile->in_link_id[ tile->in_cnt ] = link->id;
222 15 : tile->in_link_reliable[ tile->in_cnt ] = reliable;
223 15 : tile->in_link_poll[ tile->in_cnt ] = polled;
224 15 : fd_topo_obj_t * obj = fd_topob_obj( topo, "fseq", fseq_wksp );
225 15 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
226 15 : tile->in_link_fseq_obj_id[ tile->in_cnt ] = obj->id;
227 15 : tile->in_cnt++;
228 :
229 15 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->mcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY );
230 15 : if( FD_LIKELY( link->mtu ) ) {
231 3 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY );
232 3 : }
233 15 : }
234 :
235 : void
236 : fd_topob_tile_out( fd_topo_t * topo,
237 : char const * tile_name,
238 : ulong tile_kind_id,
239 : char const * link_name,
240 3 : ulong link_kind_id ) {
241 3 : ulong tile_id = fd_topo_find_tile( topo, tile_name, tile_kind_id );
242 3 : if( FD_UNLIKELY( tile_id==ULONG_MAX ) ) FD_LOG_ERR(( "tile not found: %s:%lu", tile_name, tile_kind_id ));
243 3 : fd_topo_tile_t * tile = &topo->tiles[ tile_id ];
244 :
245 3 : ulong link_id = fd_topo_find_link( topo, link_name, link_kind_id );
246 3 : if( FD_UNLIKELY( link_id==ULONG_MAX ) ) FD_LOG_ERR(( "link not found: %s:%lu", link_name, link_kind_id ));
247 3 : fd_topo_link_t * link = &topo->links[ link_id ];
248 :
249 3 : if( FD_UNLIKELY( tile->out_cnt>=FD_TOPO_MAX_TILE_OUT_LINKS ) ) FD_LOG_ERR(( "too many out links: %s", tile_name ));
250 3 : tile->out_link_id[ tile->out_cnt ] = link->id;
251 3 : tile->out_cnt++;
252 :
253 3 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->mcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_WRITE );
254 3 : if( FD_LIKELY( link->mtu ) ) {
255 3 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_WRITE );
256 3 : }
257 3 : }
258 :
259 : static void
260 0 : validate( fd_topo_t const * topo ) {
261 : /* Objects have valid wksp_ids */
262 0 : for( ulong i=0UL; i<topo->obj_cnt; i++ ) {
263 0 : if( FD_UNLIKELY( topo->objs[ i ].wksp_id>=topo->wksp_cnt ) )
264 0 : FD_LOG_ERR(( "invalid workspace id %lu", topo->objs[ i ].wksp_id ));
265 0 : }
266 :
267 : /* Tile ins are valid */
268 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
269 0 : for( ulong j=0UL; j<topo->tiles[ i ].in_cnt; j++ ) {
270 0 : if( FD_UNLIKELY( topo->tiles[ i ].in_link_id[ j ]>=topo->link_cnt ) )
271 0 : FD_LOG_ERR(( "tile %lu (%s) has invalid in link %lu", i, topo->tiles[ i ].name, topo->tiles[ i ].in_link_id[ j ] ));
272 0 : }
273 0 : }
274 :
275 : /* Tile does not have duplicated ins */
276 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
277 0 : for( ulong j=0UL; j<topo->tiles[ i ].in_cnt; j++ ) {
278 0 : for( ulong k=0UL; k<topo->tiles[ i ].in_cnt; k++ ) {
279 0 : if( FD_UNLIKELY( j==k ) ) continue;
280 0 : if( FD_UNLIKELY( topo->tiles[ i ].in_link_id[ j ] == topo->tiles[ i ].in_link_id[ k ] ) )
281 0 : FD_LOG_ERR(( "tile %lu (%s) has duplicated in link %lu (%s)", i, topo->tiles[ i ].name,
282 0 : topo->tiles[ i ].in_link_id[ j ], topo->links[ topo->tiles[ i ].in_link_id[ j ] ].name ));
283 0 : }
284 0 : }
285 0 : }
286 :
287 : /* Tile does not have duplicated outs */
288 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
289 0 : for( ulong j=0UL; j<topo->tiles[ i ].out_cnt; j++ ) {
290 0 : for( ulong k=0UL; k<topo->tiles[ i ].out_cnt; k++ ) {
291 0 : if( FD_UNLIKELY( j==k ) ) continue;
292 0 : if( FD_UNLIKELY( topo->tiles[ i ].out_link_id[ j ] == topo->tiles[ i ].out_link_id[ k ] ) )
293 0 : FD_LOG_ERR(( "tile %lu (%s) has duplicated out link %lu (%s)", i, topo->tiles[ i ].name,
294 0 : topo->tiles[ i ].out_link_id[ j ], topo->links[ topo->tiles[ i ].out_link_id[ j ] ].name ));
295 0 : }
296 0 : }
297 0 : }
298 :
299 : /* Tile outs are different than ins */
300 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
301 0 : for( ulong j=0UL; j<topo->tiles[ i ].out_cnt; j++ ) {
302 0 : for( ulong k=0UL; k<topo->tiles[ i ].in_cnt; k++ ) {
303 0 : char const * link_name = topo->links[ topo->tiles[ i ].out_link_id[ j ] ].name;
304 : /* PoH tile "publishes" this on behalf of Agave, so it's not
305 : a real circular link. */
306 0 : if( FD_UNLIKELY( !strcmp( link_name, "stake_out" ) ||
307 0 : !strcmp( link_name, "crds_shred" ) ) ) continue;
308 :
309 0 : if( FD_UNLIKELY( topo->tiles[ i ].out_link_id[ j ] == topo->tiles[ i ].in_link_id[ k ] ) )
310 0 : FD_LOG_ERR(( "tile %lu has out link %lu same as in", i, topo->tiles[ i ].out_link_id[ j ] ));
311 0 : }
312 0 : }
313 0 : }
314 :
315 : /* Non polling tile ins are also not reliable */
316 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
317 0 : for( ulong j=0UL; j<topo->tiles[ i ].in_cnt; j++ ) {
318 0 : if( FD_UNLIKELY( !topo->tiles[ i ].in_link_poll[ j ] && topo->tiles[ i ].in_link_reliable[ j ] ) )
319 0 : FD_LOG_ERR(( "tile %lu has in link %lu which is not polled but reliable", i, topo->tiles[ i ].in_link_id[ j ] ));
320 0 : }
321 0 : }
322 :
323 : /* Tile outs are valid */
324 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
325 0 : for( ulong j=0UL; j<topo->tiles[ i ].out_cnt; j++ ) {
326 0 : if( FD_UNLIKELY( topo->tiles[ i ].out_link_id[ j ] >= topo->link_cnt ) )
327 0 : FD_LOG_ERR(( "tile %lu has invalid out link %lu", i, topo->tiles[ i ].out_link_id[ j ] ));
328 0 : }
329 0 : }
330 :
331 : /* Workspace names are unique */
332 0 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
333 0 : for( ulong j=0UL; j<topo->wksp_cnt; j++ ) {
334 0 : if( FD_UNLIKELY( i==j ) ) continue;
335 0 : if( FD_UNLIKELY( !strcmp( topo->workspaces[ i ].name, topo->workspaces[ j ].name ) ) )
336 0 : FD_LOG_ERR(( "duplicate workspace name %s", topo->workspaces[ i ].name ));
337 0 : }
338 0 : }
339 :
340 : /* Each workspace is identified correctly */
341 0 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
342 0 : if( FD_UNLIKELY( topo->workspaces[ i ].id != i ) )
343 0 : FD_LOG_ERR(( "workspace %lu has id %lu", i, topo->workspaces[ i ].id ));
344 0 : }
345 :
346 : /* Each link has exactly one producer */
347 0 : for( ulong i=0UL; i<topo->link_cnt; i++ ) {
348 0 : ulong producer_cnt = 0;
349 0 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
350 0 : for( ulong k=0UL; k<topo->tiles[ j ].out_cnt; k++ ) {
351 0 : if( topo->tiles[ j ].out_link_id[ k ]==i ) producer_cnt++;
352 0 : }
353 0 : }
354 0 : if( FD_UNLIKELY( producer_cnt>1UL || ( producer_cnt==0UL && !topo->links[ i ].permit_no_producers ) ) )
355 0 : FD_LOG_ERR(( "link %lu (%s:%lu) has %lu producers", i, topo->links[ i ].name, topo->links[ i ].kind_id, producer_cnt ));
356 0 : }
357 :
358 : /* Each link has at least one consumer */
359 0 : for( ulong i=0UL; i<topo->link_cnt; i++ ) {
360 0 : ulong cnt = fd_topo_link_consumer_cnt( topo, &topo->links[ i ] );
361 0 : if( FD_UNLIKELY( cnt < 1UL && !topo->links[ i ].permit_no_consumers ) ) {
362 0 : FD_LOG_ERR(( "link %lu (%s:%lu) has 0 consumers", i, topo->links[ i ].name, topo->links[ i ].kind_id ));
363 0 : }
364 0 : }
365 0 : }
366 :
367 : void
368 : fd_topob_auto_layout_cpus( fd_topo_t * topo,
369 : fd_topo_cpus_t * cpus,
370 39 : int reserve_agave_cores ) {
371 : /* Incredibly simple automatic layout system for now ... just assign
372 : tiles to CPU cores in NUMA sequential order, except for a few tiles
373 : which should be floating. */
374 :
375 39 : char const * FLOATING[] = {
376 39 : "netlnk",
377 39 : "metric",
378 39 : "diag",
379 39 : "bencho",
380 39 : "genesi", /* FIREDANCER ONLY */
381 39 : "ipecho", /* FIREDANCER ONLY */
382 39 : "snapwr", /* FIREDANCER ONLY */
383 39 : };
384 :
385 39 : char const * ORDERED[] = {
386 39 : "backt",
387 39 : "benchg",
388 39 : "benchs",
389 39 : "net",
390 39 : "sock",
391 39 : "quic",
392 39 : "bundle",
393 39 : "verify",
394 39 : "dedup",
395 39 : "resolh", /* FRANK only */
396 39 : "resolv", /* FIREDANCER only */
397 39 : "pack",
398 39 : "bank", /* FRANK only */
399 39 : "execle", /* FIREDANCER only */
400 39 : "poh", /* FRANK only */
401 39 : "pohh", /* FIREDANCER only */
402 39 : "shred",
403 39 : "event", /* FIREADNCER only */
404 39 : "store", /* FRANK only */
405 39 : "sign",
406 39 : "plugin", /* FRANK only */
407 39 : "gui",
408 39 : "rpc", /* FIREDANCER only */
409 39 : "gossvf", /* FIREDANCER only */
410 39 : "gossip", /* FIREDANCER only */
411 39 : "repair", /* FIREDANCER only */
412 39 : "replay", /* FIREDANCER only */
413 39 : "execrp", /* FIREDANCER only */
414 39 : "txsend", /* FIREDANCER only */
415 39 : "tower", /* FIREDANCER only */
416 39 : "pktgen",
417 39 : "snapct", /* FIREDANCER only */
418 39 : "snapld", /* FIREDANCER only */
419 39 : "snapdc", /* FIREDANCER only */
420 39 : "snapin", /* FIREDANCER only */
421 39 : "snapwm", /* FIREDANCER only */
422 39 : "snapwh", /* FIREDANCER only */
423 39 : "snapla", /* FIREDANCER only */
424 39 : "snapls", /* FIREDANCER only */
425 39 : "snaplh", /* FIREDANCER only */
426 39 : "snaplv", /* FIREDANCER only */
427 39 : "arch_f", /* FIREDANCER only */
428 39 : "arch_w", /* FIREDANCER only */
429 39 : "accdb", /* FIREDANCER only */
430 39 : };
431 :
432 39 : char const * CRITICAL_TILES[] = {
433 39 : "pack",
434 39 : "poh",
435 39 : "pohh",
436 39 : "gui",
437 39 : "snapld", /* TODO: Snapshot loading speed depends on having full core */
438 39 : "snapdc", /* TODO: Snapshot loading speed depends on having full core */
439 39 : "snapin", /* TODO: Snapshot loading speed depends on having full core */
440 39 : "snapwm", /* TODO: Snapshot loading speed depends on having full core */
441 39 : "snapwh", /* TODO: Snapshot loading speed depends on having full core */
442 39 : };
443 :
444 1308 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
445 1269 : fd_topo_tile_t * tile = &topo->tiles[ i ];
446 1269 : tile->cpu_idx = ULONG_MAX;
447 1269 : }
448 :
449 39 : ulong cpu_ordering[ FD_TILE_MAX ] = { 0UL };
450 39 : int pairs_assigned[ FD_TILE_MAX ] = { 0 };
451 :
452 39 : ulong next_cpu_idx = 0UL;
453 78 : for( ulong i=0UL; i<cpus->numa_node_cnt; i++ ) {
454 4167 : for( ulong j=0UL; j<cpus->cpu_cnt; j++ ) {
455 4128 : fd_topo_cpu_t * cpu = &cpus->cpu[ j ];
456 :
457 4128 : if( FD_UNLIKELY( pairs_assigned[ j ] || cpu->numa_node!=i ) ) continue;
458 :
459 2064 : FD_TEST( next_cpu_idx<FD_TILE_MAX );
460 2064 : cpu_ordering[ next_cpu_idx++ ] = j;
461 :
462 2064 : if( FD_UNLIKELY( cpu->sibling!=ULONG_MAX ) ) {
463 : /* If the CPU has a HT pair, place it immediately after so they
464 : are sequentially assigned. */
465 2064 : FD_TEST( next_cpu_idx<FD_TILE_MAX );
466 2064 : cpu_ordering[ next_cpu_idx++ ] = cpu->sibling;
467 2064 : pairs_assigned[ cpu->sibling ] = 1;
468 2064 : }
469 2064 : }
470 39 : }
471 :
472 39 : FD_TEST( next_cpu_idx==cpus->cpu_cnt );
473 :
474 39 : int cpu_assigned[ FD_TILE_MAX ] = {0};
475 : /* excluded cpus are simply considered already assigned */
476 123 : for( ulong i=0UL; i<topo->blocklist_cores_cnt; i++ ) {
477 84 : FD_TEST( topo->blocklist_cores_cpu_idx[ i ]<FD_TILE_MAX );
478 84 : cpu_assigned[ topo->blocklist_cores_cpu_idx[ i ] ] = 1;
479 84 : }
480 :
481 : /* Compute total number of available physical cores */
482 39 : ulong available_physical = 0UL;
483 4167 : for( ulong i=0UL; i<cpus->cpu_cnt; i++ ) {
484 4128 : if( !cpu_assigned[ i ] && !pairs_assigned[ i ] ) available_physical++;
485 4128 : }
486 :
487 : /* Compute total number of tiles that need assignment */
488 39 : ulong tiles_to_assign = 0UL;
489 1308 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
490 25527 : for( ulong i=0UL; i<sizeof(ORDERED)/sizeof(ORDERED[0]); i++ ) {
491 25368 : if( !strcmp( topo->tiles[ j ].name, ORDERED[ i ] ) ) {
492 1110 : tiles_to_assign++;
493 1110 : break;
494 1110 : }
495 25368 : }
496 1269 : }
497 :
498 : /* If we have enough physical cores (excluding HT siblings) for all
499 : tiles that need assignment, exclude HT siblings so that no tile
500 : gets scheduled on a hyperthread pair.
501 : For Frankendancer, we reserve 2x cores so we have enough for Agave */
502 39 : int skip_ht_pairs = reserve_agave_cores
503 39 : ? (available_physical>=2*tiles_to_assign) /* Frankendancer */
504 39 : : (available_physical>=tiles_to_assign); /* Firedancer */
505 :
506 39 : ulong cpu_idx = 0UL;
507 :
508 1755 : for( ulong i=0UL; i<sizeof(ORDERED)/sizeof(ORDERED[0]); i++ ) {
509 57552 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
510 55836 : fd_topo_tile_t * tile = &topo->tiles[ j ];
511 55836 : if( !strcmp( tile->name, ORDERED[ i ] ) ) {
512 2880 : while( cpu_idx<FD_TILE_MAX && cpu_assigned[ cpu_ordering[ cpu_idx ] ] ) cpu_idx++;
513 1110 : if( FD_UNLIKELY( cpu_idx>=cpus->cpu_cnt ) ) {
514 0 : FD_LOG_ERR(( "auto layout cannot set affinity for tile `%s:%lu` because all the CPUs are already assigned", tile->name, tile->kind_id ));
515 1110 : } else {
516 : /* Certain tiles are latency and throughput critical and
517 : should not get a HT pair assigned. */
518 1110 : fd_topo_cpu_t const * cpu = &cpus->cpu[ cpu_ordering[ cpu_idx ] ];
519 :
520 1110 : int is_ht_critical = 0;
521 1110 : if( FD_UNLIKELY( cpu->sibling!=ULONG_MAX ) ) {
522 10221 : for( ulong k=0UL; k<sizeof(CRITICAL_TILES)/sizeof(CRITICAL_TILES[0]); k++ ) {
523 9228 : if( !strcmp( tile->name, CRITICAL_TILES[ k ] ) ) {
524 117 : is_ht_critical = 1;
525 117 : break;
526 117 : }
527 9228 : }
528 1110 : }
529 :
530 1110 : if( FD_UNLIKELY( is_ht_critical || skip_ht_pairs ) ) {
531 645 : ulong try_assign = cpu_idx;
532 669 : while( cpu_assigned[ cpu_ordering[ try_assign ] ] || (cpus->cpu[ cpu_ordering[ try_assign ] ].sibling!=ULONG_MAX && cpu_assigned[ cpus->cpu[ cpu_ordering[ try_assign ] ].sibling ]) ) {
533 24 : try_assign++;
534 24 : if( FD_UNLIKELY( try_assign>=cpus->cpu_cnt ) ) FD_LOG_ERR(( "auto layout cannot set affinity for tile `%s:%lu` because all the CPUs are already assigned or have a HT pair assigned", tile->name, tile->kind_id ));
535 24 : }
536 :
537 645 : ulong sibling = cpus->cpu[ cpu_ordering[ try_assign ] ].sibling;
538 645 : cpu_assigned[ cpu_ordering[ try_assign ] ] = 1;
539 645 : if( sibling!=ULONG_MAX ) {
540 645 : cpu_assigned[ sibling ] = 1;
541 645 : }
542 645 : tile->cpu_idx = cpu_ordering[ try_assign ];
543 645 : } else {
544 465 : cpu_assigned[ cpu_ordering[ cpu_idx ] ] = 1;
545 465 : tile->cpu_idx = cpu_ordering[ cpu_idx ];
546 465 : }
547 1110 : }
548 1110 : }
549 55836 : }
550 1716 : }
551 :
552 : /* Make sure all the tiles we haven't set are supposed to be floating. */
553 1308 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
554 1269 : fd_topo_tile_t * tile = &topo->tiles[ i ];
555 1269 : if( tile->cpu_idx!=ULONG_MAX ) continue;
556 :
557 159 : int found = 0;
558 465 : for( ulong j=0UL; j<sizeof(FLOATING)/sizeof(FLOATING[0]); j++ ) {
559 465 : if( !strcmp( tile->name, FLOATING[ j ] ) ) {
560 159 : found = 1;
561 159 : break;
562 159 : }
563 465 : }
564 :
565 159 : if( FD_UNLIKELY( !found ) ) FD_LOG_WARNING(( "auto layout cannot affine tile `%s:%lu` because it is unknown. Leaving it floating", tile->name, tile->kind_id ));
566 159 : }
567 :
568 39 : if( FD_UNLIKELY( reserve_agave_cores ) ) {
569 1386 : for( ulong i=cpu_idx; i<cpus->cpu_cnt; i++ ) {
570 1368 : if( FD_UNLIKELY( !cpus->cpu[ cpu_ordering[ i ] ].online ) ) continue;
571 1368 : if( FD_UNLIKELY( cpu_assigned[ cpu_ordering[ i ] ] ) ) continue;
572 :
573 1332 : if( FD_LIKELY( topo->agave_affinity_cnt<sizeof(topo->agave_affinity_cpu_idx)/sizeof(topo->agave_affinity_cpu_idx[0]) ) ) {
574 1332 : topo->agave_affinity_cpu_idx[ topo->agave_affinity_cnt++ ] = cpu_ordering[ i ];
575 1332 : }
576 1332 : }
577 18 : }
578 39 : }
579 :
580 : void
581 : fd_topob_auto_layout( fd_topo_t * topo,
582 0 : int reserve_agave_cores ) {
583 0 : fd_topo_cpus_t cpus[1];
584 0 : fd_topo_cpus_init( cpus );
585 0 : fd_topob_auto_layout_cpus( topo, cpus, reserve_agave_cores );
586 0 : }
587 :
588 : ulong
589 : fd_numa_node_idx( ulong cpu_idx );
590 :
591 : static void
592 0 : initialize_numa_assignments( fd_topo_t * topo ) {
593 : /* Assign workspaces to NUMA nodes. The heuristic here is pretty
594 : simple for now: workspaces go on the NUMA node of the first
595 : tile which maps the largest object in the workspace. */
596 :
597 0 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
598 0 : ulong max_footprint = 0UL;
599 0 : ulong max_obj = ULONG_MAX;
600 :
601 0 : for( ulong j=0UL; j<topo->obj_cnt; j++ ) {
602 0 : fd_topo_obj_t * obj = &topo->objs[ j ];
603 0 : if( obj->wksp_id!=i ) continue;
604 0 : if( FD_UNLIKELY( !obj->footprint ) ) FD_LOG_ERR(( "obj %lu (%s) has invalid parameters", j, obj->name ));
605 :
606 0 : if( FD_UNLIKELY( !max_footprint || obj->footprint>max_footprint ) ) {
607 0 : max_footprint = obj->footprint;
608 0 : max_obj = j;
609 0 : }
610 0 : }
611 :
612 0 : if( FD_UNLIKELY( max_obj==ULONG_MAX ) ) FD_LOG_ERR(( "no object found for workspace %s", topo->workspaces[ i ].name ));
613 :
614 0 : int found_strict = 0;
615 0 : int found_lazy = 0;
616 0 : int found_assigned = 0;
617 0 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
618 0 : fd_topo_tile_t * tile = &topo->tiles[ j ];
619 0 : if( FD_UNLIKELY( tile->tile_obj_id==max_obj && tile->cpu_idx<FD_TILE_MAX ) ) {
620 0 : topo->workspaces[ i ].numa_idx = fd_numa_node_idx( tile->cpu_idx );
621 0 : FD_TEST( topo->workspaces[ i ].numa_idx!=ULONG_MAX );
622 0 : found_strict = 1;
623 0 : found_lazy = 1;
624 0 : found_assigned = 1;
625 0 : break;
626 0 : } else if( FD_UNLIKELY( tile->tile_obj_id==max_obj && tile->cpu_idx>=FD_TILE_MAX ) ) {
627 0 : topo->workspaces[ i ].numa_idx = 0;
628 0 : found_lazy = 1;
629 0 : break;
630 0 : }
631 0 : }
632 :
633 0 : if( FD_LIKELY( !found_strict ) ) {
634 0 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
635 0 : fd_topo_tile_t * tile = &topo->tiles[ j ];
636 0 : for( ulong k=0UL; k<tile->uses_obj_cnt; k++ ) {
637 0 : if( FD_LIKELY( tile->uses_obj_id[ k ]==max_obj && tile->cpu_idx<FD_TILE_MAX ) ) {
638 0 : topo->workspaces[ i ].numa_idx = fd_numa_node_idx( tile->cpu_idx );
639 0 : FD_TEST( topo->workspaces[ i ].numa_idx!=ULONG_MAX );
640 0 : found_lazy = 1;
641 0 : found_assigned = 1;
642 0 : break;
643 0 : } else if( FD_UNLIKELY( tile->uses_obj_id[ k ]==max_obj ) && tile->cpu_idx>=FD_TILE_MAX ) {
644 0 : topo->workspaces[ i ].numa_idx = 0;
645 0 : found_lazy = 1;
646 : /* Don't break, keep looking -- a tile with a CPU assignment
647 : might also use object in which case we want to use that
648 : NUMA node. */
649 0 : }
650 0 : }
651 :
652 0 : if( FD_UNLIKELY( found_assigned ) ) break;
653 0 : }
654 0 : }
655 :
656 0 : if( FD_UNLIKELY( !found_lazy ) ) FD_LOG_ERR(( "no tile uses object %s for workspace %s", topo->objs[ max_obj ].name, topo->workspaces[ i ].name ));
657 0 : }
658 0 : }
659 :
660 : void
661 : fd_topob_finish( fd_topo_t * topo,
662 0 : fd_topo_obj_callbacks_t ** callbacks ) {
663 0 : for( ulong z=0UL; z<topo->tile_cnt; z++ ) {
664 0 : fd_topo_tile_t * tile = &topo->tiles[ z ];
665 :
666 0 : ulong in_cnt = 0UL;
667 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
668 0 : if( FD_UNLIKELY( !tile->in_link_poll[ i ] ) ) continue;
669 0 : in_cnt++;
670 0 : }
671 :
672 0 : FD_TEST( !fd_pod_replacef_ulong( topo->props, in_cnt, "obj.%lu.in_cnt", tile->metrics_obj_id ) );
673 0 : }
674 :
675 0 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
676 0 : fd_topo_wksp_t * wksp = &topo->workspaces[ i ];
677 :
678 0 : ulong loose_sz = 0UL;
679 0 : for( ulong j=0UL; j<topo->obj_cnt; j++ ) {
680 0 : fd_topo_obj_t * obj = &topo->objs[ j ];
681 0 : if( FD_UNLIKELY( obj->wksp_id!=wksp->id ) ) continue;
682 :
683 0 : fd_topo_obj_callbacks_t * cb = NULL;
684 0 : for( ulong i=0UL; callbacks[ i ]; i++ ) {
685 0 : if( FD_UNLIKELY( !strcmp( callbacks[ i ]->name, obj->name ) ) ) {
686 0 : cb = callbacks[ i ];
687 0 : break;
688 0 : }
689 0 : }
690 0 : if( FD_UNLIKELY( !cb ) ) FD_LOG_ERR(( "no callbacks for object %s", obj->name ));
691 :
692 0 : if( FD_UNLIKELY( cb->loose ) ) loose_sz += cb->loose( topo, obj );
693 0 : }
694 :
695 0 : ulong part_max = wksp->part_max;
696 0 : if( !part_max ) part_max = (loose_sz / (64UL << 10)); /* alloc + residual padding */
697 0 : part_max += 3; /* for initial alignment */
698 0 : ulong offset = fd_ulong_align_up( fd_wksp_private_data_off( part_max ), fd_topo_workspace_align() );
699 :
700 0 : for( ulong j=0UL; j<topo->obj_cnt; j++ ) {
701 0 : fd_topo_obj_t * obj = &topo->objs[ j ];
702 0 : if( FD_UNLIKELY( obj->wksp_id!=wksp->id ) ) continue;
703 :
704 0 : fd_topo_obj_callbacks_t * cb = NULL;
705 0 : for( ulong i=0UL; callbacks[ i ]; i++ ) {
706 0 : if( FD_UNLIKELY( !strcmp( callbacks[ i ]->name, obj->name ) ) ) {
707 0 : cb = callbacks[ i ];
708 0 : break;
709 0 : }
710 0 : }
711 0 : if( FD_UNLIKELY( !cb ) ) FD_LOG_ERR(( "no callbacks for object %s", obj->name ));
712 :
713 0 : ulong align_ = cb->align( topo, obj );
714 0 : if( FD_UNLIKELY( !fd_ulong_is_pow2( align_ ) ) ) FD_LOG_ERR(( "Return value of fdctl_obj_align(%s,%lu) is not a power of 2", obj->name, obj->id ));
715 0 : offset = fd_ulong_align_up( offset, align_ );
716 0 : obj->offset = offset;
717 0 : obj->footprint = cb->footprint( topo, obj );
718 0 : if( FD_UNLIKELY( 0!=strcmp( obj->name, "tile" ) && (!obj->footprint || obj->footprint>LONG_MAX) ) ) {
719 0 : FD_LOG_ERR(( "fdctl_obj_footprint(%s,%lu) failed", obj->name, obj->id ));
720 0 : }
721 0 : offset += obj->footprint;
722 0 : }
723 :
724 0 : ulong footprint = fd_ulong_align_up( offset, fd_topo_workspace_align() );
725 :
726 0 : part_max = fd_ulong_max( part_max, wksp->min_part_max );
727 0 : loose_sz = fd_ulong_max( loose_sz, wksp->min_loose_sz );
728 :
729 : /* Compute footprint for a workspace that can store our footprint,
730 : with an extra align of padding incase gaddr_lo is not aligned. */
731 0 : ulong total_wksp_footprint = fd_wksp_footprint( part_max, footprint + fd_topo_workspace_align() + loose_sz );
732 :
733 0 : ulong page_sz = topo->max_page_size;
734 0 : if( total_wksp_footprint < topo->gigantic_page_threshold ) page_sz = FD_SHMEM_HUGE_PAGE_SZ;
735 0 : if( FD_UNLIKELY( page_sz!=FD_SHMEM_HUGE_PAGE_SZ && page_sz!=FD_SHMEM_GIGANTIC_PAGE_SZ ) ) FD_LOG_ERR(( "invalid page_sz" ));
736 :
737 0 : ulong wksp_aligned_footprint = fd_ulong_align_up( total_wksp_footprint, page_sz );
738 :
739 : /* Give any leftover space in the underlying shared memory to the
740 : data region of the workspace, since we might as well use it. */
741 0 : wksp->part_max = part_max;
742 0 : wksp->known_footprint = footprint;
743 0 : wksp->total_footprint = wksp_aligned_footprint - fd_ulong_align_up( fd_wksp_private_data_off( part_max ), fd_topo_workspace_align() );
744 0 : wksp->page_sz = page_sz;
745 0 : wksp->page_cnt = wksp_aligned_footprint / page_sz;
746 0 : }
747 :
748 0 : initialize_numa_assignments( topo );
749 :
750 0 : validate( topo );
751 0 : }
|