Line data Source code
1 : #include "fd_topob.h"
2 :
3 : #include "../../util/pod/fd_pod_format.h"
4 : #include "fd_cpu_topo.h"
5 :
6 : fd_topo_t *
7 : fd_topob_new( void * mem,
8 9 : char const * app_name ) {
9 9 : fd_topo_t * topo = (fd_topo_t *)mem;
10 :
11 9 : if( FD_UNLIKELY( !topo ) ) {
12 0 : FD_LOG_WARNING( ( "NULL topo" ) );
13 0 : return NULL;
14 0 : }
15 :
16 9 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)topo, alignof(fd_topo_t) ) ) ) {
17 0 : FD_LOG_WARNING( ( "misaligned topo" ) );
18 0 : return NULL;
19 0 : }
20 :
21 9 : fd_memset( topo, 0, sizeof(fd_topo_t) );
22 :
23 9 : FD_TEST( fd_pod_new( topo->props, sizeof(topo->props) ) );
24 :
25 9 : if( FD_UNLIKELY( strlen( app_name )>=sizeof(topo->app_name) ) ) FD_LOG_ERR(( "app_name too long: %s", app_name ));
26 9 : strncpy( topo->app_name, app_name, sizeof(topo->app_name) );
27 :
28 9 : topo->max_page_size = FD_SHMEM_GIGANTIC_PAGE_SZ;
29 9 : topo->gigantic_page_threshold = 4 * FD_SHMEM_HUGE_PAGE_SZ;
30 :
31 9 : return topo;
32 9 : }
33 :
34 : fd_topo_wksp_t *
35 : fd_topob_wksp( fd_topo_t * topo,
36 363 : char const * name ) {
37 363 : if( FD_UNLIKELY( !topo || !name || !strlen( name ) ) ) FD_LOG_ERR(( "NULL args" ));
38 363 : if( FD_UNLIKELY( strlen( name )>=sizeof(topo->workspaces[ topo->wksp_cnt ].name ) ) ) FD_LOG_ERR(( "wksp name too long: %s", name ));
39 363 : if( FD_UNLIKELY( topo->wksp_cnt>=FD_TOPO_MAX_WKSPS ) ) FD_LOG_ERR(( "too many workspaces" ));
40 :
41 363 : fd_topo_wksp_t * wksp = &topo->workspaces[ topo->wksp_cnt ];
42 363 : strncpy( wksp->name, name, sizeof(wksp->name) );
43 363 : wksp->id = topo->wksp_cnt;
44 363 : wksp->is_locked = 1;
45 363 : topo->wksp_cnt++;
46 363 : return wksp;
47 363 : }
48 :
49 : fd_topo_obj_t *
50 : fd_topob_obj( fd_topo_t * topo,
51 : char const * obj_name,
52 1701 : char const * wksp_name ) {
53 1701 : if( FD_UNLIKELY( !topo || !obj_name || !wksp_name ) ) FD_LOG_ERR(( "NULL args" ));
54 1701 : if( FD_UNLIKELY( strlen( obj_name )>=sizeof(topo->objs[ topo->obj_cnt ].name ) ) ) FD_LOG_ERR(( "obj name too long: %s", obj_name ));
55 1701 : if( FD_UNLIKELY( topo->obj_cnt>=FD_TOPO_MAX_OBJS ) ) FD_LOG_ERR(( "too many objects" ));
56 :
57 1701 : ulong wksp_id = fd_topo_find_wksp( topo, wksp_name );
58 1701 : if( FD_UNLIKELY( wksp_id==ULONG_MAX ) ) FD_LOG_ERR(( "workspace not found: %s", wksp_name ));
59 :
60 1701 : fd_topo_obj_t * obj = &topo->objs[ topo->obj_cnt ];
61 1701 : strncpy( obj->name, obj_name, sizeof(obj->name) );
62 1701 : obj->id = topo->obj_cnt;
63 1701 : obj->wksp_id = wksp_id;
64 1701 : topo->obj_cnt++;
65 :
66 1701 : return obj;
67 1701 : }
68 :
69 : fd_topo_link_t *
70 : fd_topob_link( fd_topo_t * topo,
71 : char const * link_name,
72 : char const * wksp_name,
73 : ulong depth,
74 : ulong mtu,
75 339 : ulong burst ) {
76 339 : if( FD_UNLIKELY( !topo || !link_name || !wksp_name ) ) FD_LOG_ERR(( "NULL args" ));
77 339 : if( FD_UNLIKELY( strlen( link_name )>=sizeof(topo->links[ topo->link_cnt ].name ) ) ) FD_LOG_ERR(( "link name too long: %s", link_name ));
78 339 : if( FD_UNLIKELY( topo->link_cnt>=FD_TOPO_MAX_LINKS ) ) FD_LOG_ERR(( "too many links" ));
79 :
80 339 : ulong kind_id = 0UL;
81 9369 : for( ulong i=0UL; i<topo->link_cnt; i++ ) {
82 9030 : if( !strcmp( topo->links[ i ].name, link_name ) ) kind_id++;
83 9030 : }
84 :
85 339 : fd_topo_link_t * link = &topo->links[ topo->link_cnt ];
86 339 : strncpy( link->name, link_name, sizeof(link->name) );
87 339 : link->id = topo->link_cnt;
88 339 : link->kind_id = kind_id;
89 339 : link->depth = depth;
90 339 : link->mtu = mtu;
91 339 : link->burst = burst;
92 :
93 339 : fd_topo_obj_t * obj = fd_topob_obj( topo, "mcache", wksp_name );
94 339 : link->mcache_obj_id = obj->id;
95 339 : FD_TEST( fd_pod_insertf_ulong( topo->props, depth, "obj.%lu.depth", obj->id ) );
96 :
97 339 : if( mtu ) {
98 312 : obj = fd_topob_obj( topo, "dcache", wksp_name );
99 312 : link->dcache_obj_id = obj->id;
100 312 : FD_TEST( fd_pod_insertf_ulong( topo->props, depth, "obj.%lu.depth", obj->id ) );
101 312 : FD_TEST( fd_pod_insertf_ulong( topo->props, burst, "obj.%lu.burst", obj->id ) );
102 312 : FD_TEST( fd_pod_insertf_ulong( topo->props, mtu, "obj.%lu.mtu", obj->id ) );
103 312 : }
104 339 : topo->link_cnt++;
105 :
106 339 : return link;
107 339 : }
108 :
109 : void
110 : fd_topob_tile_uses( fd_topo_t * topo,
111 : fd_topo_tile_t * tile,
112 : fd_topo_obj_t * obj,
113 2847 : int mode ) {
114 2847 : (void)topo;
115 :
116 2847 : if( FD_UNLIKELY( tile->uses_obj_cnt>=FD_TOPO_MAX_TILE_OBJS ) ) FD_LOG_ERR(( "tile `%s` uses too many objects", tile->name ));
117 :
118 2847 : tile->uses_obj_id[ tile->uses_obj_cnt ] = obj->id;
119 2847 : tile->uses_obj_mode[ tile->uses_obj_cnt ] = mode;
120 2847 : tile->uses_obj_cnt++;
121 2847 : }
122 :
123 : fd_topo_tile_t *
124 : fd_topob_tile( fd_topo_t * topo,
125 : char const * tile_name,
126 : char const * tile_wksp,
127 : char const * metrics_wksp,
128 : ulong cpu_idx,
129 : int is_agave,
130 189 : int uses_keyswitch ) {
131 189 : if( FD_UNLIKELY( !topo || !tile_name || !tile_wksp || !metrics_wksp ) ) FD_LOG_ERR(( "NULL args" ));
132 189 : if( FD_UNLIKELY( strlen( tile_name )>=sizeof(topo->tiles[ topo->tile_cnt ].name ) ) ) FD_LOG_ERR(( "tile name too long: %s", tile_name ));
133 189 : if( FD_UNLIKELY( topo->tile_cnt>=FD_TOPO_MAX_TILES ) ) FD_LOG_ERR(( "too many tiles %lu", topo->tile_cnt ));
134 :
135 189 : ulong kind_id = 0UL;
136 3015 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
137 2826 : if( !strcmp( topo->tiles[ i ].name, tile_name ) ) kind_id++;
138 2826 : }
139 :
140 189 : fd_topo_tile_t * tile = &topo->tiles[ topo->tile_cnt ];
141 189 : strncpy( tile->name, tile_name, sizeof(tile->name) );
142 189 : tile->metrics_name[ 0 ] = 0;
143 189 : tile->id = topo->tile_cnt;
144 189 : tile->kind_id = kind_id;
145 189 : tile->is_agave = is_agave;
146 189 : tile->cpu_idx = cpu_idx;
147 189 : tile->in_cnt = 0UL;
148 189 : tile->out_cnt = 0UL;
149 189 : tile->uses_obj_cnt = 0UL;
150 :
151 189 : fd_topo_obj_t * tile_obj = fd_topob_obj( topo, "tile", tile_wksp );
152 189 : tile->tile_obj_id = tile_obj->id;
153 189 : fd_topob_tile_uses( topo, tile, tile_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
154 :
155 189 : fd_topo_obj_t * obj = fd_topob_obj( topo, "metrics", metrics_wksp );
156 189 : tile->metrics_obj_id = obj->id;
157 189 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
158 :
159 189 : if( FD_LIKELY( uses_keyswitch ) ) {
160 33 : obj = fd_topob_obj( topo, "keyswitch", tile_wksp );
161 33 : tile->keyswitch_obj_id = obj->id;
162 33 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
163 156 : } else {
164 156 : tile->keyswitch_obj_id = ULONG_MAX;
165 156 : }
166 :
167 189 : topo->tile_cnt++;
168 189 : return tile;
169 189 : }
170 :
171 : void
172 : fd_topob_tile_in( fd_topo_t * topo,
173 : char const * tile_name,
174 : ulong tile_kind_id,
175 : char const * fseq_wksp,
176 : char const * link_name,
177 : ulong link_kind_id,
178 : int reliable,
179 501 : int polled ) {
180 501 : if( FD_UNLIKELY( !topo || !tile_name || !fseq_wksp || !link_name ) ) FD_LOG_ERR(( "NULL args" ));
181 :
182 501 : ulong tile_id = fd_topo_find_tile( topo, tile_name, tile_kind_id );
183 501 : if( FD_UNLIKELY( tile_id==ULONG_MAX ) ) FD_LOG_ERR(( "tile not found: %s:%lu", tile_name, tile_kind_id ));
184 501 : fd_topo_tile_t * tile = &topo->tiles[ tile_id ];
185 :
186 501 : ulong link_id = fd_topo_find_link( topo, link_name, link_kind_id );
187 501 : if( FD_UNLIKELY( link_id==ULONG_MAX ) ) FD_LOG_ERR(( "link not found: %s:%lu", link_name, link_kind_id ));
188 501 : fd_topo_link_t * link = &topo->links[ link_id ];
189 :
190 501 : if( FD_UNLIKELY( tile->in_cnt>=FD_TOPO_MAX_TILE_IN_LINKS ) ) FD_LOG_ERR(( "too many in links: %s:%lu", tile_name, tile_kind_id ) );
191 501 : tile->in_link_id[ tile->in_cnt ] = link->id;
192 501 : tile->in_link_reliable[ tile->in_cnt ] = reliable;
193 501 : tile->in_link_poll[ tile->in_cnt ] = polled;
194 501 : fd_topo_obj_t * obj = fd_topob_obj( topo, "fseq", fseq_wksp );
195 501 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
196 501 : tile->in_link_fseq_obj_id[ tile->in_cnt ] = obj->id;
197 501 : tile->in_cnt++;
198 :
199 501 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->mcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY );
200 501 : if( FD_LIKELY( link->mtu ) ) {
201 471 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY );
202 471 : }
203 501 : }
204 :
205 : void
206 : fd_topob_tile_out( fd_topo_t * topo,
207 : char const * tile_name,
208 : ulong tile_kind_id,
209 : char const * link_name,
210 342 : ulong link_kind_id ) {
211 342 : ulong tile_id = fd_topo_find_tile( topo, tile_name, tile_kind_id );
212 342 : if( FD_UNLIKELY( tile_id==ULONG_MAX ) ) FD_LOG_ERR(( "tile not found: %s:%lu", tile_name, tile_kind_id ));
213 342 : fd_topo_tile_t * tile = &topo->tiles[ tile_id ];
214 :
215 342 : ulong link_id = fd_topo_find_link( topo, link_name, link_kind_id );
216 342 : if( FD_UNLIKELY( link_id==ULONG_MAX ) ) FD_LOG_ERR(( "link not found: %s:%lu", link_name, link_kind_id ));
217 342 : fd_topo_link_t * link = &topo->links[ link_id ];
218 :
219 342 : if( FD_UNLIKELY( tile->out_cnt>=FD_TOPO_MAX_TILE_OUT_LINKS ) ) FD_LOG_ERR(( "too many out links: %s", tile_name ));
220 342 : tile->out_link_id[ tile->out_cnt ] = link->id;
221 342 : tile->out_cnt++;
222 :
223 342 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->mcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_WRITE );
224 342 : if( FD_LIKELY( link->mtu ) ) {
225 333 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_WRITE );
226 333 : }
227 342 : }
228 :
229 : static void
230 6 : validate( fd_topo_t const * topo ) {
231 : /* Objects have valid wksp_ids */
232 1626 : for( ulong i=0UL; i<topo->obj_cnt; i++ ) {
233 1620 : if( FD_UNLIKELY( topo->objs[ i ].wksp_id>=topo->wksp_cnt ) )
234 0 : FD_LOG_ERR(( "invalid workspace id %lu", topo->objs[ i ].wksp_id ));
235 1620 : }
236 :
237 : /* Tile ins are valid */
238 189 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
239 666 : for( ulong j=0UL; j<topo->tiles[ i ].in_cnt; j++ ) {
240 483 : if( FD_UNLIKELY( topo->tiles[ i ].in_link_id[ j ]>=topo->link_cnt ) )
241 0 : FD_LOG_ERR(( "tile %lu (%s) has invalid in link %lu", i, topo->tiles[ i ].name, topo->tiles[ i ].in_link_id[ j ] ));
242 483 : }
243 183 : }
244 :
245 : /* Tile does not have duplicated ins */
246 189 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
247 666 : for( ulong j=0UL; j<topo->tiles[ i ].in_cnt; j++ ) {
248 2850 : for( ulong k=0UL; k<topo->tiles[ i ].in_cnt; k++ ) {
249 2367 : if( FD_UNLIKELY( j==k ) ) continue;
250 1884 : if( FD_UNLIKELY( topo->tiles[ i ].in_link_id[ j ] == topo->tiles[ i ].in_link_id[ k ] ) )
251 0 : FD_LOG_ERR(( "tile %lu (%s) has duplicated in link %lu (%s)", i, topo->tiles[ i ].name,
252 1884 : topo->tiles[ i ].in_link_id[ j ], topo->links[ topo->tiles[ i ].in_link_id[ j ] ].name ));
253 1884 : }
254 483 : }
255 183 : }
256 :
257 : /* Tile does not have duplicated outs */
258 189 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
259 519 : for( ulong j=0UL; j<topo->tiles[ i ].out_cnt; j++ ) {
260 2046 : for( ulong k=0UL; k<topo->tiles[ i ].out_cnt; k++ ) {
261 1710 : if( FD_UNLIKELY( j==k ) ) continue;
262 1374 : if( FD_UNLIKELY( topo->tiles[ i ].out_link_id[ j ] == topo->tiles[ i ].out_link_id[ k ] ) )
263 0 : FD_LOG_ERR(( "tile %lu (%s) has duplicated out link %lu (%s)", i, topo->tiles[ i ].name,
264 1374 : topo->tiles[ i ].out_link_id[ j ], topo->links[ topo->tiles[ i ].out_link_id[ j ] ].name ));
265 1374 : }
266 336 : }
267 183 : }
268 :
269 : /* Tile outs are different than ins */
270 189 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
271 519 : for( ulong j=0UL; j<topo->tiles[ i ].out_cnt; j++ ) {
272 1668 : for( ulong k=0UL; k<topo->tiles[ i ].in_cnt; k++ ) {
273 1332 : char const * link_name = topo->links[ topo->tiles[ i ].out_link_id[ j ] ].name;
274 : /* PoH tile "publishes" this on behalf of Agave, so it's not
275 : a real circular link. */
276 1332 : if( FD_UNLIKELY( !strcmp( link_name, "stake_out" ) ||
277 1332 : !strcmp( link_name, "crds_shred" ) ) ) continue;
278 :
279 1296 : if( FD_UNLIKELY( topo->tiles[ i ].out_link_id[ j ] == topo->tiles[ i ].in_link_id[ k ] ) )
280 0 : FD_LOG_ERR(( "tile %lu has out link %lu same as in", i, topo->tiles[ i ].out_link_id[ j ] ));
281 1296 : }
282 336 : }
283 183 : }
284 :
285 : /* Non polling tile ins are also not reliable */
286 189 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
287 666 : for( ulong j=0UL; j<topo->tiles[ i ].in_cnt; j++ ) {
288 483 : if( FD_UNLIKELY( !topo->tiles[ i ].in_link_poll[ j ] && topo->tiles[ i ].in_link_reliable[ j ] ) )
289 0 : FD_LOG_ERR(( "tile %lu has in link %lu which is not polled but reliable", i, topo->tiles[ i ].in_link_id[ j ] ));
290 483 : }
291 183 : }
292 :
293 : /* Tile outs are valid */
294 189 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
295 519 : for( ulong j=0UL; j<topo->tiles[ i ].out_cnt; j++ ) {
296 336 : if( FD_UNLIKELY( topo->tiles[ i ].out_link_id[ j ] >= topo->link_cnt ) )
297 0 : FD_LOG_ERR(( "tile %lu has invalid out link %lu", i, topo->tiles[ i ].out_link_id[ j ] ));
298 336 : }
299 183 : }
300 :
301 : /* Workspace names are unique */
302 360 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
303 23406 : for( ulong j=0UL; j<topo->wksp_cnt; j++ ) {
304 23052 : if( FD_UNLIKELY( i==j ) ) continue;
305 22698 : if( FD_UNLIKELY( !strcmp( topo->workspaces[ i ].name, topo->workspaces[ j ].name ) ) )
306 0 : FD_LOG_ERR(( "duplicate workspace name %s", topo->workspaces[ i ].name ));
307 22698 : }
308 354 : }
309 :
310 : /* Each workspace is identified correctly */
311 360 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
312 354 : if( FD_UNLIKELY( topo->workspaces[ i ].id != i ) )
313 0 : FD_LOG_ERR(( "workspace %lu has id %lu", i, topo->workspaces[ i ].id ));
314 354 : }
315 :
316 : /* Each link has exactly one producer */
317 342 : for( ulong i=0UL; i<topo->link_cnt; i++ ) {
318 336 : ulong producer_cnt = 0;
319 11169 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
320 30999 : for( ulong k=0UL; k<topo->tiles[ j ].out_cnt; k++ ) {
321 20166 : if( topo->tiles[ j ].out_link_id[ k ]==i ) producer_cnt++;
322 20166 : }
323 10833 : }
324 336 : if( FD_UNLIKELY( producer_cnt>1UL || ( producer_cnt==0UL && !topo->links[ i ].permit_no_producers ) ) )
325 0 : FD_LOG_ERR(( "link %lu (%s:%lu) has %lu producers", i, topo->links[ i ].name, topo->links[ i ].kind_id, producer_cnt ));
326 336 : }
327 :
328 : /* Each link has at least one consumer */
329 342 : for( ulong i=0UL; i<topo->link_cnt; i++ ) {
330 336 : ulong cnt = fd_topo_link_consumer_cnt( topo, &topo->links[ i ] );
331 336 : if( FD_UNLIKELY( cnt < 1UL && !topo->links[ i ].permit_no_consumers ) ) {
332 0 : FD_LOG_ERR(( "link %lu (%s:%lu) has 0 consumers", i, topo->links[ i ].name, topo->links[ i ].kind_id ));
333 0 : }
334 336 : }
335 6 : }
336 :
337 : void
338 : fd_topob_auto_layout( fd_topo_t * topo,
339 6 : int reserve_agave_cores ) {
340 : /* Incredibly simple automatic layout system for now ... just assign
341 : tiles to CPU cores in NUMA sequential order, except for a few tiles
342 : which should be floating. */
343 :
344 6 : char const * FLOATING[] = {
345 6 : "netlnk",
346 6 : "metric",
347 6 : "cswtch",
348 6 : "bencho",
349 6 : "genesi", /* FIREDANCER ONLY */
350 6 : "ipecho", /* FIREDANCER ONLY */
351 6 : };
352 :
353 6 : char const * ORDERED[] = {
354 6 : "benchg",
355 6 : "benchs",
356 6 : "net",
357 6 : "sock",
358 6 : "quic",
359 6 : "bundle",
360 6 : "verify",
361 6 : "dedup",
362 6 : "resolv", /* FRANK only */
363 6 : "pack",
364 6 : "bank", /* FRANK only */
365 6 : "poh", /* FRANK only */
366 6 : "pohi", /* FIREDANCER only */
367 6 : "shred",
368 6 : "store", /* FRANK only */
369 6 : "storei", /* FIREDANCER only */
370 6 : "sign",
371 6 : "plugin",
372 6 : "gui",
373 6 : "gossvf", /* FIREDANCER only */
374 6 : "gossip", /* FIREDANCER only */
375 6 : "repair", /* FIREDANCER only */
376 6 : "replay", /* FIREDANCER only */
377 6 : "exec", /* FIREDANCER only */
378 6 : "writer", /* FIREDANCER only */
379 6 : "send", /* FIREDANCER only */
380 6 : "tower", /* FIREDANCER only */
381 6 : "rpcsrv", /* FIREDANCER only */
382 6 : "pktgen",
383 6 : "snaprd", /* FIREDANCER only */
384 6 : "snapdc", /* FIREDANCER only */
385 6 : "snapin", /* FIREDANCER only */
386 6 : "arch_f", /* FIREDANCER only */
387 6 : "arch_w", /* FIREDANCER only */
388 6 : };
389 :
390 6 : char const * CRITICAL_TILES[] = {
391 6 : "pack",
392 6 : "poh",
393 6 : "snapin", /* TODO: Snapshot loading speed depends on having full core */
394 6 : };
395 :
396 189 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
397 183 : fd_topo_tile_t * tile = &topo->tiles[ i ];
398 183 : tile->cpu_idx = ULONG_MAX;
399 183 : }
400 :
401 6 : fd_topo_cpus_t cpus[1];
402 6 : fd_topo_cpus_init( cpus );
403 :
404 6 : ulong cpu_ordering[ FD_TILE_MAX ] = { 0UL };
405 6 : int pairs_assigned[ FD_TILE_MAX ] = { 0 };
406 :
407 6 : ulong next_cpu_idx = 0UL;
408 12 : for( ulong i=0UL; i<cpus->numa_node_cnt; i++ ) {
409 390 : for( ulong j=0UL; j<cpus->cpu_cnt; j++ ) {
410 384 : fd_topo_cpu_t * cpu = &cpus->cpu[ j ];
411 :
412 384 : if( FD_UNLIKELY( pairs_assigned[ j ] || cpu->numa_node!=i ) ) continue;
413 :
414 192 : FD_TEST( next_cpu_idx<FD_TILE_MAX );
415 192 : cpu_ordering[ next_cpu_idx++ ] = j;
416 :
417 192 : if( FD_UNLIKELY( cpu->sibling!=ULONG_MAX ) ) {
418 : /* If the CPU has a HT pair, place it immediately after so they
419 : are sequentially assigned. */
420 192 : FD_TEST( next_cpu_idx<FD_TILE_MAX );
421 192 : cpu_ordering[ next_cpu_idx++ ] = cpu->sibling;
422 192 : pairs_assigned[ cpu->sibling ] = 1;
423 192 : }
424 192 : }
425 6 : }
426 :
427 6 : FD_TEST( next_cpu_idx==cpus->cpu_cnt );
428 :
429 6 : int cpu_assigned[ FD_TILE_MAX ] = {0};
430 :
431 6 : ulong cpu_idx = 0UL;
432 210 : for( ulong i=0UL; i<sizeof(ORDERED)/sizeof(ORDERED[0]); i++ ) {
433 6426 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
434 6222 : fd_topo_tile_t * tile = &topo->tiles[ j ];
435 6222 : if( !strcmp( tile->name, ORDERED[ i ] ) ) {
436 162 : if( FD_UNLIKELY( cpu_idx>=cpus->cpu_cnt ) ) {
437 0 : FD_LOG_ERR(( "auto layout cannot set affinity for tile `%s:%lu` because all the CPUs are already assigned", tile->name, tile->kind_id ));
438 162 : } else {
439 : /* Certain tiles are latency and throughput critical and
440 : should not get a HT pair assigned. */
441 162 : fd_topo_cpu_t const * cpu = &cpus->cpu[ cpu_ordering[ cpu_idx ] ];
442 :
443 162 : int is_ht_critical = 0;
444 162 : if( FD_UNLIKELY( cpu->sibling!=ULONG_MAX ) ) {
445 618 : for( ulong k=0UL; k<sizeof(CRITICAL_TILES)/sizeof(CRITICAL_TILES[0]); k++ ) {
446 468 : if( !strcmp( tile->name, CRITICAL_TILES[ k ] ) ) {
447 12 : is_ht_critical = 1;
448 12 : break;
449 12 : }
450 468 : }
451 162 : }
452 :
453 162 : if( FD_UNLIKELY( is_ht_critical ) ) {
454 12 : ulong try_assign = cpu_idx;
455 15 : while( cpu_assigned[ cpu_ordering[ try_assign ] ] || (cpus->cpu[ cpu_ordering[ try_assign ] ].sibling!=ULONG_MAX && cpu_assigned[ cpus->cpu[ cpu_ordering[ try_assign ] ].sibling ]) ) {
456 3 : try_assign++;
457 3 : if( FD_UNLIKELY( try_assign>=cpus->cpu_cnt ) ) FD_LOG_ERR(( "auto layout cannot set affinity for tile `%s:%lu` because all the CPUs are already assigned or have a HT pair assigned", tile->name, tile->kind_id ));
458 3 : }
459 :
460 12 : ulong sibling = cpus->cpu[ cpu_ordering[ try_assign ] ].sibling;
461 12 : cpu_assigned[ cpu_ordering[ try_assign ] ] = 1;
462 12 : if( sibling!=ULONG_MAX ) {
463 12 : cpu_assigned[ sibling ] = 1;
464 12 : }
465 12 : tile->cpu_idx = cpu_ordering[ try_assign ];
466 30 : while( cpu_assigned[ cpu_ordering[ cpu_idx ] ] ) cpu_idx++;
467 150 : } else {
468 150 : cpu_assigned[ cpu_ordering[ cpu_idx ] ] = 1;
469 150 : tile->cpu_idx = cpu_ordering[ cpu_idx ];
470 306 : while( cpu_assigned[ cpu_ordering[ cpu_idx ] ] ) cpu_idx++;
471 150 : }
472 162 : }
473 162 : }
474 6222 : }
475 204 : }
476 :
477 : /* Make sure all the tiles we haven't set are supposed to be floating. */
478 189 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
479 183 : fd_topo_tile_t * tile = &topo->tiles[ i ];
480 183 : if( tile->cpu_idx!=ULONG_MAX ) continue;
481 :
482 21 : int found = 0;
483 60 : for( ulong j=0UL; j<sizeof(FLOATING)/sizeof(FLOATING[0]); j++ ) {
484 60 : if( !strcmp( tile->name, FLOATING[ j ] ) ) {
485 21 : found = 1;
486 21 : break;
487 21 : }
488 60 : }
489 :
490 21 : if( FD_UNLIKELY( !found ) ) FD_LOG_WARNING(( "auto layout cannot affine tile `%s:%lu` because it is unknown. Leaving it floating", tile->name, tile->kind_id ));
491 21 : }
492 :
493 6 : if( FD_UNLIKELY( reserve_agave_cores ) ) {
494 126 : for( ulong i=cpu_idx; i<cpus->cpu_cnt; i++ ) {
495 123 : if( FD_UNLIKELY( !cpus->cpu[ cpu_ordering[ i ] ].online ) ) continue;
496 :
497 123 : if( FD_LIKELY( topo->agave_affinity_cnt<sizeof(topo->agave_affinity_cpu_idx)/sizeof(topo->agave_affinity_cpu_idx[0]) ) ) {
498 123 : topo->agave_affinity_cpu_idx[ topo->agave_affinity_cnt++ ] = cpu_ordering[ i ];
499 123 : }
500 123 : }
501 3 : }
502 6 : }
503 :
504 : ulong
505 : fd_numa_node_idx( ulong cpu_idx );
506 :
507 : static void
508 6 : initialize_numa_assignments( fd_topo_t * topo ) {
509 : /* Assign workspaces to NUMA nodes. The heuristic here is pretty
510 : simple for now: workspaces go on the NUMA node of the first
511 : tile which maps the largest object in the workspace. */
512 :
513 360 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
514 354 : ulong max_footprint = 0UL;
515 354 : ulong max_obj = ULONG_MAX;
516 :
517 103800 : for( ulong j=0UL; j<topo->obj_cnt; j++ ) {
518 103446 : fd_topo_obj_t * obj = &topo->objs[ j ];
519 103446 : if( obj->wksp_id!=i ) continue;
520 :
521 1620 : if( FD_UNLIKELY( !max_footprint || obj->footprint>max_footprint ) ) {
522 648 : max_footprint = obj->footprint;
523 648 : max_obj = j;
524 648 : }
525 1620 : }
526 :
527 354 : if( FD_UNLIKELY( max_obj==ULONG_MAX ) ) FD_LOG_ERR(( "no object found for workspace %s", topo->workspaces[ i ].name ));
528 :
529 354 : int found_strict = 0;
530 354 : int found_lazy = 0;
531 10347 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
532 10083 : fd_topo_tile_t * tile = &topo->tiles[ j ];
533 10083 : if( FD_UNLIKELY( tile->tile_obj_id==max_obj && tile->cpu_idx<FD_TILE_MAX ) ) {
534 69 : topo->workspaces[ i ].numa_idx = fd_numa_node_idx( tile->cpu_idx );
535 69 : FD_TEST( topo->workspaces[ i ].numa_idx!=ULONG_MAX );
536 69 : found_strict = 1;
537 69 : found_lazy = 1;
538 69 : break;
539 10014 : } else if( FD_UNLIKELY( tile->tile_obj_id==max_obj && tile->cpu_idx>=FD_TILE_MAX ) ) {
540 21 : topo->workspaces[ i ].numa_idx = 0;
541 21 : found_lazy = 1;
542 21 : break;
543 21 : }
544 10083 : }
545 :
546 354 : if( FD_LIKELY( !found_strict ) ) {
547 3303 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
548 3303 : fd_topo_tile_t * tile = &topo->tiles[ j ];
549 53814 : for( ulong k=0UL; k<tile->uses_obj_cnt; k++ ) {
550 50754 : if( FD_LIKELY( tile->uses_obj_id[ k ]==max_obj && tile->cpu_idx<FD_TILE_MAX ) ) {
551 243 : topo->workspaces[ i ].numa_idx = fd_numa_node_idx( tile->cpu_idx );
552 243 : FD_TEST( topo->workspaces[ i ].numa_idx!=ULONG_MAX );
553 243 : found_lazy = 1;
554 243 : break;
555 50511 : } else if( FD_UNLIKELY( tile->uses_obj_id[ k ]==max_obj ) && tile->cpu_idx>=FD_TILE_MAX ) {
556 27 : topo->workspaces[ i ].numa_idx = 0;
557 27 : found_lazy = 1;
558 : /* Don't break, keep looking -- a tile with a CPU assignment
559 : might also use object in which case we want to use that
560 : NUMA node. */
561 27 : }
562 50754 : }
563 :
564 3303 : if( FD_UNLIKELY( found_lazy ) ) break;
565 3303 : }
566 285 : }
567 :
568 354 : if( FD_UNLIKELY( !found_lazy ) ) FD_LOG_ERR(( "no tile uses object %s for workspace %s", topo->objs[ max_obj ].name, topo->workspaces[ i ].name ));
569 354 : }
570 6 : }
571 :
572 : void
573 : fd_topob_finish( fd_topo_t * topo,
574 6 : fd_topo_obj_callbacks_t ** callbacks ) {
575 189 : for( ulong z=0UL; z<topo->tile_cnt; z++ ) {
576 183 : fd_topo_tile_t * tile = &topo->tiles[ z ];
577 :
578 183 : ulong in_cnt = 0UL;
579 666 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
580 483 : if( FD_UNLIKELY( !tile->in_link_poll[ i ] ) ) continue;
581 468 : in_cnt++;
582 468 : }
583 :
584 183 : ulong cons_cnt = 0UL;
585 6018 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
586 5835 : fd_topo_tile_t * consumer_tile = &topo->tiles[ i ];
587 21405 : for( ulong j=0UL; j<consumer_tile->in_cnt; j++ ) {
588 44553 : for( ulong k=0UL; k<tile->out_cnt; k++ ) {
589 28983 : if( FD_UNLIKELY( consumer_tile->in_link_id[ j ]==tile->out_link_id[ k ] && consumer_tile->in_link_reliable[ j ] ) ) {
590 354 : cons_cnt++;
591 354 : }
592 28983 : }
593 15570 : }
594 5835 : }
595 :
596 183 : FD_TEST( !fd_pod_replacef_ulong( topo->props, in_cnt, "obj.%lu.in_cnt", tile->metrics_obj_id ) );
597 183 : FD_TEST( !fd_pod_replacef_ulong( topo->props, cons_cnt, "obj.%lu.cons_cnt", tile->metrics_obj_id ) );
598 183 : }
599 :
600 360 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
601 354 : fd_topo_wksp_t * wksp = &topo->workspaces[ i ];
602 :
603 354 : ulong loose_sz = 0UL;
604 103800 : for( ulong j=0UL; j<topo->obj_cnt; j++ ) {
605 103446 : fd_topo_obj_t * obj = &topo->objs[ j ];
606 103446 : if( FD_UNLIKELY( obj->wksp_id!=wksp->id ) ) continue;
607 :
608 1620 : fd_topo_obj_callbacks_t * cb = NULL;
609 6114 : for( ulong i=0UL; callbacks[ i ]; i++ ) {
610 6114 : if( FD_UNLIKELY( !strcmp( callbacks[ i ]->name, obj->name ) ) ) {
611 1620 : cb = callbacks[ i ];
612 1620 : break;
613 1620 : }
614 6114 : }
615 1620 : if( FD_UNLIKELY( !cb ) ) FD_LOG_ERR(( "no callbacks for object %s", obj->name ));
616 :
617 1620 : if( FD_UNLIKELY( cb->loose ) ) loose_sz += cb->loose( topo, obj );
618 1620 : }
619 :
620 354 : ulong part_max = wksp->part_max;
621 354 : if( !part_max ) part_max = (loose_sz / (64UL << 10)); /* alloc + residual padding */
622 354 : part_max += 3; /* for initial alignment */
623 354 : ulong offset = fd_ulong_align_up( fd_wksp_private_data_off( part_max ), fd_topo_workspace_align() );
624 :
625 103800 : for( ulong j=0UL; j<topo->obj_cnt; j++ ) {
626 103446 : fd_topo_obj_t * obj = &topo->objs[ j ];
627 103446 : if( FD_UNLIKELY( obj->wksp_id!=wksp->id ) ) continue;
628 :
629 1620 : fd_topo_obj_callbacks_t * cb = NULL;
630 6114 : for( ulong i=0UL; callbacks[ i ]; i++ ) {
631 6114 : if( FD_UNLIKELY( !strcmp( callbacks[ i ]->name, obj->name ) ) ) {
632 1620 : cb = callbacks[ i ];
633 1620 : break;
634 1620 : }
635 6114 : }
636 1620 : if( FD_UNLIKELY( !cb ) ) FD_LOG_ERR(( "no callbacks for object %s", obj->name ));
637 :
638 1620 : ulong align_ = cb->align( topo, obj );
639 1620 : if( FD_UNLIKELY( !fd_ulong_is_pow2( align_ ) ) ) FD_LOG_ERR(( "Return value of fdctl_obj_align(%s,%lu) is not a power of 2", obj->name, obj->id ));
640 1620 : offset = fd_ulong_align_up( offset, align_ );
641 1620 : obj->offset = offset;
642 1620 : obj->footprint = cb->footprint( topo, obj );
643 1620 : if( FD_UNLIKELY( 0!=strcmp( obj->name, "tile" ) && (!obj->footprint || obj->footprint>LONG_MAX) ) ) {
644 0 : FD_LOG_ERR(( "fdctl_obj_footprint(%s,%lu) failed", obj->name, obj->id ));
645 0 : }
646 1620 : offset += obj->footprint;
647 1620 : }
648 :
649 354 : ulong footprint = fd_ulong_align_up( offset, fd_topo_workspace_align() );
650 :
651 : /* Compute footprint for a workspace that can store our footprint,
652 : with an extra align of padding incase gaddr_lo is not aligned. */
653 354 : ulong total_wksp_footprint = fd_wksp_footprint( part_max, footprint + fd_topo_workspace_align() + loose_sz );
654 :
655 354 : ulong page_sz = topo->max_page_size;
656 354 : if( total_wksp_footprint < topo->gigantic_page_threshold ) page_sz = FD_SHMEM_HUGE_PAGE_SZ;
657 354 : if( FD_UNLIKELY( page_sz!=FD_SHMEM_HUGE_PAGE_SZ && page_sz!=FD_SHMEM_GIGANTIC_PAGE_SZ ) ) FD_LOG_ERR(( "invalid page_sz" ));
658 :
659 : /* If the workspace is not locked, we can't use huge pages. */
660 354 : if( FD_UNLIKELY( !wksp->is_locked ) ) {
661 0 : page_sz = FD_SHMEM_NORMAL_PAGE_SZ;
662 0 : }
663 :
664 354 : ulong wksp_aligned_footprint = fd_ulong_align_up( total_wksp_footprint, page_sz );
665 :
666 : /* Give any leftover space in the underlying shared memory to the
667 : data region of the workspace, since we might as well use it. */
668 354 : wksp->part_max = part_max;
669 354 : wksp->known_footprint = footprint;
670 354 : wksp->total_footprint = wksp_aligned_footprint - fd_ulong_align_up( fd_wksp_private_data_off( part_max ), fd_topo_workspace_align() );
671 354 : wksp->page_sz = page_sz;
672 354 : wksp->page_cnt = wksp_aligned_footprint / page_sz;
673 354 : }
674 :
675 6 : initialize_numa_assignments( topo );
676 :
677 6 : validate( topo );
678 6 : }
|