Line data Source code
1 : #include "fd_topob.h"
2 :
3 : #include "../../util/pod/fd_pod_format.h"
4 : #include "fd_cpu_topo.h"
5 :
6 : fd_topo_t *
7 : fd_topob_new( void * mem,
8 0 : char const * app_name ) {
9 0 : fd_topo_t * topo = (fd_topo_t *)mem;
10 :
11 0 : if( FD_UNLIKELY( !topo ) ) {
12 0 : FD_LOG_WARNING( ( "NULL topo" ) );
13 0 : return NULL;
14 0 : }
15 :
16 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)topo, alignof(fd_topo_t) ) ) ) {
17 0 : FD_LOG_WARNING( ( "misaligned topo" ) );
18 0 : return NULL;
19 0 : }
20 :
21 0 : fd_memset( topo, 0, sizeof(fd_topo_t) );
22 :
23 0 : FD_TEST( fd_pod_new( topo->props, sizeof(topo->props) ) );
24 :
25 0 : if( FD_UNLIKELY( strlen( app_name )>=sizeof(topo->app_name) ) ) FD_LOG_ERR(( "app_name too long: %s", app_name ));
26 0 : strncpy( topo->app_name, app_name, sizeof(topo->app_name) );
27 :
28 0 : topo->max_page_size = FD_SHMEM_GIGANTIC_PAGE_SZ;
29 0 : topo->gigantic_page_threshold = 4 * FD_SHMEM_HUGE_PAGE_SZ;
30 :
31 0 : return topo;
32 0 : }
33 :
34 : void
35 : fd_topob_wksp( fd_topo_t * topo,
36 3 : char const * name ) {
37 3 : if( FD_UNLIKELY( !topo || !name || !strlen( name ) ) ) FD_LOG_ERR(( "NULL args" ));
38 3 : if( FD_UNLIKELY( strlen( name )>=sizeof(topo->workspaces[ topo->wksp_cnt ].name ) ) ) FD_LOG_ERR(( "wksp name too long: %s", name ));
39 3 : if( FD_UNLIKELY( topo->wksp_cnt>=FD_TOPO_MAX_WKSPS ) ) FD_LOG_ERR(( "too many workspaces" ));
40 :
41 3 : fd_topo_wksp_t * wksp = &topo->workspaces[ topo->wksp_cnt ];
42 3 : strncpy( wksp->name, name, sizeof(wksp->name) );
43 3 : wksp->id = topo->wksp_cnt;
44 3 : wksp->is_locked = 1;
45 3 : topo->wksp_cnt++;
46 3 : }
47 :
48 : fd_topo_obj_t *
49 : fd_topob_obj( fd_topo_t * topo,
50 : char const * obj_name,
51 39 : char const * wksp_name ) {
52 39 : if( FD_UNLIKELY( !topo || !obj_name || !wksp_name ) ) FD_LOG_ERR(( "NULL args" ));
53 39 : if( FD_UNLIKELY( strlen( obj_name )>=sizeof(topo->objs[ topo->obj_cnt ].name ) ) ) FD_LOG_ERR(( "obj name too long: %s", obj_name ));
54 39 : if( FD_UNLIKELY( topo->obj_cnt>=FD_TOPO_MAX_OBJS ) ) FD_LOG_ERR(( "too many objects" ));
55 :
56 39 : ulong wksp_id = fd_topo_find_wksp( topo, wksp_name );
57 39 : if( FD_UNLIKELY( wksp_id==ULONG_MAX ) ) FD_LOG_ERR(( "workspace not found: %s", wksp_name ));
58 :
59 39 : fd_topo_obj_t * obj = &topo->objs[ topo->obj_cnt ];
60 39 : strncpy( obj->name, obj_name, sizeof(obj->name) );
61 39 : obj->id = topo->obj_cnt;
62 39 : obj->wksp_id = wksp_id;
63 39 : topo->obj_cnt++;
64 :
65 39 : return obj;
66 39 : }
67 :
68 : fd_topo_link_t *
69 : fd_topob_link( fd_topo_t * topo,
70 : char const * link_name,
71 : char const * wksp_name,
72 : ulong depth,
73 : ulong mtu,
74 6 : ulong burst ) {
75 6 : if( FD_UNLIKELY( !topo || !link_name || !wksp_name ) ) FD_LOG_ERR(( "NULL args" ));
76 6 : if( FD_UNLIKELY( strlen( link_name )>=sizeof(topo->links[ topo->link_cnt ].name ) ) ) FD_LOG_ERR(( "link name too long: %s", link_name ));
77 6 : if( FD_UNLIKELY( topo->link_cnt>=FD_TOPO_MAX_LINKS ) ) FD_LOG_ERR(( "too many links" ));
78 :
79 6 : ulong kind_id = 0UL;
80 9 : for( ulong i=0UL; i<topo->link_cnt; i++ ) {
81 3 : if( !strcmp( topo->links[ i ].name, link_name ) ) kind_id++;
82 3 : }
83 :
84 6 : fd_topo_link_t * link = &topo->links[ topo->link_cnt ];
85 6 : strncpy( link->name, link_name, sizeof(link->name) );
86 6 : link->id = topo->link_cnt;
87 6 : link->kind_id = kind_id;
88 6 : link->depth = depth;
89 6 : link->mtu = mtu;
90 6 : link->burst = burst;
91 :
92 6 : fd_topo_obj_t * obj = fd_topob_obj( topo, "mcache", wksp_name );
93 6 : link->mcache_obj_id = obj->id;
94 6 : FD_TEST( fd_pod_insertf_ulong( topo->props, depth, "obj.%lu.depth", obj->id ) );
95 :
96 6 : if( mtu ) {
97 6 : obj = fd_topob_obj( topo, "dcache", wksp_name );
98 6 : link->dcache_obj_id = obj->id;
99 6 : FD_TEST( fd_pod_insertf_ulong( topo->props, depth, "obj.%lu.depth", obj->id ) );
100 6 : FD_TEST( fd_pod_insertf_ulong( topo->props, burst, "obj.%lu.burst", obj->id ) );
101 6 : FD_TEST( fd_pod_insertf_ulong( topo->props, mtu, "obj.%lu.mtu", obj->id ) );
102 6 : }
103 6 : topo->link_cnt++;
104 :
105 6 : return link;
106 6 : }
107 :
108 : void
109 : fd_topob_tile_uses( fd_topo_t * topo,
110 : fd_topo_tile_t * tile,
111 : fd_topo_obj_t * obj,
112 21 : int mode ) {
113 21 : (void)topo;
114 :
115 21 : if( FD_UNLIKELY( tile->uses_obj_cnt>=FD_TOPO_MAX_TILE_OBJS ) ) FD_LOG_ERR(( "tile `%s` uses too many objects", tile->name ));
116 :
117 21 : tile->uses_obj_id[ tile->uses_obj_cnt ] = obj->id;
118 21 : tile->uses_obj_mode[ tile->uses_obj_cnt ] = mode;
119 21 : tile->uses_obj_cnt++;
120 21 : }
121 :
122 : fd_topo_tile_t *
123 : fd_topob_tile( fd_topo_t * topo,
124 : char const * tile_name,
125 : char const * tile_wksp,
126 : char const * metrics_wksp,
127 : ulong cpu_idx,
128 : int is_agave,
129 3 : int uses_keyswitch ) {
130 3 : if( FD_UNLIKELY( !topo || !tile_name || !tile_wksp || !metrics_wksp ) ) FD_LOG_ERR(( "NULL args" ));
131 3 : if( FD_UNLIKELY( strlen( tile_name )>=sizeof(topo->tiles[ topo->tile_cnt ].name ) ) ) FD_LOG_ERR(( "tile name too long: %s", tile_name ));
132 3 : if( FD_UNLIKELY( topo->tile_cnt>=FD_TOPO_MAX_TILES ) ) FD_LOG_ERR(( "too many tiles %lu", topo->tile_cnt ));
133 :
134 3 : ulong kind_id = 0UL;
135 3 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
136 0 : if( !strcmp( topo->tiles[ i ].name, tile_name ) ) kind_id++;
137 0 : }
138 :
139 3 : fd_topo_tile_t * tile = &topo->tiles[ topo->tile_cnt ];
140 3 : strncpy( tile->name, tile_name, sizeof(tile->name) );
141 3 : tile->id = topo->tile_cnt;
142 3 : tile->kind_id = kind_id;
143 3 : tile->is_agave = is_agave;
144 3 : tile->cpu_idx = cpu_idx;
145 3 : tile->in_cnt = 0UL;
146 3 : tile->out_cnt = 0UL;
147 3 : tile->uses_obj_cnt = 0UL;
148 :
149 3 : fd_topo_obj_t * tile_obj = fd_topob_obj( topo, "tile", tile_wksp );
150 3 : tile->tile_obj_id = tile_obj->id;
151 3 : fd_topob_tile_uses( topo, tile, tile_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
152 :
153 3 : fd_topo_obj_t * obj = fd_topob_obj( topo, "metrics", metrics_wksp );
154 3 : tile->metrics_obj_id = obj->id;
155 3 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
156 :
157 3 : if( FD_LIKELY( uses_keyswitch ) ) {
158 0 : obj = fd_topob_obj( topo, "keyswitch", tile_wksp );
159 0 : tile->keyswitch_obj_id = obj->id;
160 0 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
161 3 : } else {
162 3 : tile->keyswitch_obj_id = ULONG_MAX;
163 3 : }
164 :
165 3 : topo->tile_cnt++;
166 3 : return tile;
167 3 : }
168 :
169 : void
170 : fd_topob_tile_in( fd_topo_t * topo,
171 : char const * tile_name,
172 : ulong tile_kind_id,
173 : char const * fseq_wksp,
174 : char const * link_name,
175 : ulong link_kind_id,
176 : int reliable,
177 3 : int polled ) {
178 3 : if( FD_UNLIKELY( !topo || !tile_name || !fseq_wksp || !link_name ) ) FD_LOG_ERR(( "NULL args" ));
179 :
180 3 : ulong tile_id = fd_topo_find_tile( topo, tile_name, tile_kind_id );
181 3 : if( FD_UNLIKELY( tile_id==ULONG_MAX ) ) FD_LOG_ERR(( "tile not found: %s:%lu", tile_name, tile_kind_id ));
182 3 : fd_topo_tile_t * tile = &topo->tiles[ tile_id ];
183 :
184 3 : ulong link_id = fd_topo_find_link( topo, link_name, link_kind_id );
185 3 : if( FD_UNLIKELY( link_id==ULONG_MAX ) ) FD_LOG_ERR(( "link not found: %s:%lu", link_name, link_kind_id ));
186 3 : fd_topo_link_t * link = &topo->links[ link_id ];
187 :
188 3 : if( FD_UNLIKELY( tile->in_cnt>=FD_TOPO_MAX_TILE_IN_LINKS ) ) FD_LOG_ERR(( "too many in links: %s:%lu", tile_name, tile_kind_id ) );
189 3 : tile->in_link_id[ tile->in_cnt ] = link->id;
190 3 : tile->in_link_reliable[ tile->in_cnt ] = reliable;
191 3 : tile->in_link_poll[ tile->in_cnt ] = polled;
192 3 : fd_topo_obj_t * obj = fd_topob_obj( topo, "fseq", fseq_wksp );
193 3 : fd_topob_tile_uses( topo, tile, obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
194 3 : tile->in_link_fseq_obj_id[ tile->in_cnt ] = obj->id;
195 3 : tile->in_cnt++;
196 :
197 3 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->mcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY );
198 3 : if( FD_LIKELY( link->mtu ) ) {
199 3 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_ONLY );
200 3 : }
201 3 : }
202 :
203 : void
204 : fd_topob_tile_out( fd_topo_t * topo,
205 : char const * tile_name,
206 : ulong tile_kind_id,
207 : char const * link_name,
208 3 : ulong link_kind_id ) {
209 3 : ulong tile_id = fd_topo_find_tile( topo, tile_name, tile_kind_id );
210 3 : if( FD_UNLIKELY( tile_id==ULONG_MAX ) ) FD_LOG_ERR(( "tile not found: %s:%lu", tile_name, tile_kind_id ));
211 3 : fd_topo_tile_t * tile = &topo->tiles[ tile_id ];
212 :
213 3 : ulong link_id = fd_topo_find_link( topo, link_name, link_kind_id );
214 3 : if( FD_UNLIKELY( link_id==ULONG_MAX ) ) FD_LOG_ERR(( "link not found: %s:%lu", link_name, link_kind_id ));
215 3 : fd_topo_link_t * link = &topo->links[ link_id ];
216 :
217 3 : if( FD_UNLIKELY( tile->out_cnt>=FD_TOPO_MAX_TILE_OUT_LINKS ) ) FD_LOG_ERR(( "too many out links: %s", tile_name ));
218 3 : tile->out_link_id[ tile->out_cnt ] = link->id;
219 3 : tile->out_cnt++;
220 :
221 3 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->mcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_WRITE );
222 3 : if( FD_LIKELY( link->mtu ) ) {
223 3 : fd_topob_tile_uses( topo, tile, &topo->objs[ link->dcache_obj_id ], FD_SHMEM_JOIN_MODE_READ_WRITE );
224 3 : }
225 3 : }
226 :
227 : static void
228 0 : validate( fd_topo_t const * topo ) {
229 : /* Objects have valid wksp_ids */
230 0 : for( ulong i=0UL; i<topo->obj_cnt; i++ ) {
231 0 : if( FD_UNLIKELY( topo->objs[ i ].wksp_id>=topo->wksp_cnt ) )
232 0 : FD_LOG_ERR(( "invalid workspace id %lu", topo->objs[ i ].wksp_id ));
233 0 : }
234 :
235 : /* Tile ins are valid */
236 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
237 0 : for( ulong j=0UL; j<topo->tiles[ i ].in_cnt; j++ ) {
238 0 : if( FD_UNLIKELY( topo->tiles[ i ].in_link_id[ j ]>=topo->link_cnt ) )
239 0 : FD_LOG_ERR(( "tile %lu (%s) has invalid in link %lu", i, topo->tiles[ i ].name, topo->tiles[ i ].in_link_id[ j ] ));
240 0 : }
241 0 : }
242 :
243 : /* Tile does not have duplicated ins */
244 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
245 0 : for( ulong j=0UL; j<topo->tiles[ i ].in_cnt; j++ ) {
246 0 : for( ulong k=0UL; k<topo->tiles[ i ].in_cnt; k++ ) {
247 0 : if( FD_UNLIKELY( j==k ) ) continue;
248 0 : if( FD_UNLIKELY( topo->tiles[ i ].in_link_id[ j ] == topo->tiles[ i ].in_link_id[ k ] ) )
249 0 : FD_LOG_ERR(( "tile %lu (%s) has duplicated in link %lu (%s)", i, topo->tiles[ i ].name,
250 0 : topo->tiles[ i ].in_link_id[ j ], topo->links[ topo->tiles[ i ].in_link_id[ j ] ].name ));
251 0 : }
252 0 : }
253 0 : }
254 :
255 : /* Tile does not have duplicated outs */
256 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
257 0 : for( ulong j=0UL; j<topo->tiles[ i ].out_cnt; j++ ) {
258 0 : for( ulong k=0UL; k<topo->tiles[ i ].out_cnt; k++ ) {
259 0 : if( FD_UNLIKELY( j==k ) ) continue;
260 0 : if( FD_UNLIKELY( topo->tiles[ i ].out_link_id[ j ] == topo->tiles[ i ].out_link_id[ k ] ) )
261 0 : FD_LOG_ERR(( "tile %lu (%s) has duplicated out link %lu (%s)", i, topo->tiles[ i ].name,
262 0 : topo->tiles[ i ].out_link_id[ j ], topo->links[ topo->tiles[ i ].out_link_id[ j ] ].name ));
263 0 : }
264 0 : }
265 0 : }
266 :
267 : /* Tile outs are different than ins */
268 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
269 0 : for( ulong j=0UL; j<topo->tiles[ i ].out_cnt; j++ ) {
270 0 : for( ulong k=0UL; k<topo->tiles[ i ].in_cnt; k++ ) {
271 0 : char const * link_name = topo->links[ topo->tiles[ i ].out_link_id[ j ] ].name;
272 : /* PoH tile "publishes" this on behalf of Agave, so it's not
273 : a real circular link. */
274 0 : if( FD_UNLIKELY( !strcmp( link_name, "stake_out" ) ||
275 0 : !strcmp( link_name, "crds_shred" ) ) ) continue;
276 :
277 0 : if( FD_UNLIKELY( topo->tiles[ i ].out_link_id[ j ] == topo->tiles[ i ].in_link_id[ k ] ) )
278 0 : FD_LOG_ERR(( "tile %lu has out link %lu same as in", i, topo->tiles[ i ].out_link_id[ j ] ));
279 0 : }
280 0 : }
281 0 : }
282 :
283 : /* Non polling tile ins are also not reliable */
284 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
285 0 : for( ulong j=0UL; j<topo->tiles[ i ].in_cnt; j++ ) {
286 0 : if( FD_UNLIKELY( !topo->tiles[ i ].in_link_poll[ j ] && topo->tiles[ i ].in_link_reliable[ j ] ) )
287 0 : FD_LOG_ERR(( "tile %lu has in link %lu which is not polled but reliable", i, topo->tiles[ i ].in_link_id[ j ] ));
288 0 : }
289 0 : }
290 :
291 : /* Tile outs are valid */
292 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
293 0 : for( ulong j=0UL; j<topo->tiles[ i ].out_cnt; j++ ) {
294 0 : if( FD_UNLIKELY( topo->tiles[ i ].out_link_id[ j ] >= topo->link_cnt ) )
295 0 : FD_LOG_ERR(( "tile %lu has invalid out link %lu", i, topo->tiles[ i ].out_link_id[ j ] ));
296 0 : }
297 0 : }
298 :
299 : /* Workspace names are unique */
300 0 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
301 0 : for( ulong j=0UL; j<topo->wksp_cnt; j++ ) {
302 0 : if( FD_UNLIKELY( i==j ) ) continue;
303 0 : if( FD_UNLIKELY( !strcmp( topo->workspaces[ i ].name, topo->workspaces[ j ].name ) ) )
304 0 : FD_LOG_ERR(( "duplicate workspace name %s", topo->workspaces[ i ].name ));
305 0 : }
306 0 : }
307 :
308 : /* Each workspace is identified correctly */
309 0 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
310 0 : if( FD_UNLIKELY( topo->workspaces[ i ].id != i ) )
311 0 : FD_LOG_ERR(( "workspace %lu has id %lu", i, topo->workspaces[ i ].id ));
312 0 : }
313 :
314 : /* Each link has exactly one producer */
315 0 : for( ulong i=0UL; i<topo->link_cnt; i++ ) {
316 0 : ulong producer_cnt = 0;
317 0 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
318 0 : for( ulong k=0UL; k<topo->tiles[ j ].out_cnt; k++ ) {
319 0 : if( topo->tiles[ j ].out_link_id[ k ]==i ) producer_cnt++;
320 0 : }
321 0 : }
322 0 : if( FD_UNLIKELY( producer_cnt>1UL || ( producer_cnt==0UL && !topo->links[ i ].permit_no_producers ) ) )
323 0 : FD_LOG_ERR(( "link %lu (%s:%lu) has %lu producers", i, topo->links[ i ].name, topo->links[ i ].kind_id, producer_cnt ));
324 0 : }
325 :
326 : /* Each link has at least one consumer */
327 0 : for( ulong i=0UL; i<topo->link_cnt; i++ ) {
328 0 : ulong cnt = fd_topo_link_consumer_cnt( topo, &topo->links[ i ] );
329 0 : if( FD_UNLIKELY( cnt < 1UL && !topo->links[ i ].permit_no_consumers ) ) {
330 0 : FD_LOG_ERR(( "link %lu (%s:%lu) has 0 consumers", i, topo->links[ i ].name, topo->links[ i ].kind_id ));
331 0 : }
332 0 : }
333 0 : }
334 :
335 : void
336 : fd_topob_auto_layout( fd_topo_t * topo,
337 0 : int reserve_agave_cores ) {
338 : /* Incredibly simple automatic layout system for now ... just assign
339 : tiles to CPU cores in NUMA sequential order, except for a few tiles
340 : which should be floating. */
341 :
342 0 : char const * FLOATING[] = {
343 0 : "netlnk",
344 0 : "metric",
345 0 : "cswtch",
346 0 : "bencho",
347 0 : };
348 :
349 0 : char const * ORDERED[] = {
350 0 : "benchg",
351 0 : "benchs",
352 0 : "net",
353 0 : "sock",
354 0 : "quic",
355 0 : "bundle",
356 0 : "verify",
357 0 : "dedup",
358 0 : "resolv", /* FRANK only */
359 0 : "pack",
360 0 : "bank", /* FRANK only */
361 0 : "poh", /* FRANK only */
362 0 : "pohi", /* FIREDANCER only */
363 0 : "shred",
364 0 : "store", /* FRANK only */
365 0 : "storei", /* FIREDANCER only */
366 0 : "sign",
367 0 : "plugin",
368 0 : "gui",
369 0 : "gossip", /* FIREDANCER only */
370 0 : "repair", /* FIREDANCER only */
371 0 : "replay", /* FIREDANCER only */
372 0 : "exec", /* FIREDANCER only */
373 0 : "writer", /* FIREDANCER only */
374 0 : "send", /* FIREDANCER only */
375 0 : "tower", /* FIREDANCER only */
376 0 : "rpcsrv", /* FIREDANCER only */
377 0 : "pktgen",
378 0 : "snaprd", /* FIREDANCER only */
379 0 : "snapdc", /* FIREDANCER only */
380 0 : "snapin", /* FIREDANCER only */
381 0 : "arch_f", /* FIREDANCER only */
382 0 : "arch_w", /* FIREDANCER only */
383 0 : };
384 :
385 0 : char const * CRITICAL_TILES[] = {
386 0 : "pack",
387 0 : "poh",
388 0 : };
389 :
390 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
391 0 : fd_topo_tile_t * tile = &topo->tiles[ i ];
392 0 : tile->cpu_idx = ULONG_MAX;
393 0 : }
394 :
395 0 : fd_topo_cpus_t cpus[1];
396 0 : fd_topo_cpus_init( cpus );
397 :
398 0 : ulong cpu_ordering[ FD_TILE_MAX ] = { 0UL };
399 0 : int pairs_assigned[ FD_TILE_MAX ] = { 0 };
400 :
401 0 : ulong next_cpu_idx = 0UL;
402 0 : for( ulong i=0UL; i<cpus->numa_node_cnt; i++ ) {
403 0 : for( ulong j=0UL; j<cpus->cpu_cnt; j++ ) {
404 0 : fd_topo_cpu_t * cpu = &cpus->cpu[ j ];
405 :
406 0 : if( FD_UNLIKELY( pairs_assigned[ j ] || cpu->numa_node!=i ) ) continue;
407 :
408 0 : FD_TEST( next_cpu_idx<FD_TILE_MAX );
409 0 : cpu_ordering[ next_cpu_idx++ ] = j;
410 :
411 0 : if( FD_UNLIKELY( cpu->sibling!=ULONG_MAX ) ) {
412 : /* If the CPU has a HT pair, place it immediately after so they
413 : are sequentially assigned. */
414 0 : FD_TEST( next_cpu_idx<FD_TILE_MAX );
415 0 : cpu_ordering[ next_cpu_idx++ ] = cpu->sibling;
416 0 : pairs_assigned[ cpu->sibling ] = 1;
417 0 : }
418 0 : }
419 0 : }
420 :
421 0 : FD_TEST( next_cpu_idx==cpus->cpu_cnt );
422 :
423 0 : int cpu_assigned[ FD_TILE_MAX ] = {0};
424 :
425 0 : ulong cpu_idx = 0UL;
426 0 : for( ulong i=0UL; i<sizeof(ORDERED)/sizeof(ORDERED[0]); i++ ) {
427 0 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
428 0 : fd_topo_tile_t * tile = &topo->tiles[ j ];
429 0 : if( !strcmp( tile->name, ORDERED[ i ] ) ) {
430 0 : if( FD_UNLIKELY( cpu_idx>=cpus->cpu_cnt ) ) {
431 0 : FD_LOG_ERR(( "auto layout cannot set affinity for tile `%s:%lu` because all the CPUs are already assigned", tile->name, tile->kind_id ));
432 0 : } else {
433 : /* Certain tiles are latency and throughput critical and
434 : should not get a HT pair assigned. */
435 0 : fd_topo_cpu_t const * cpu = &cpus->cpu[ cpu_ordering[ cpu_idx ] ];
436 :
437 0 : int is_ht_critical = 0;
438 0 : if( FD_UNLIKELY( cpu->sibling!=ULONG_MAX ) ) {
439 0 : for( ulong k=0UL; k<sizeof(CRITICAL_TILES)/sizeof(CRITICAL_TILES[0]); k++ ) {
440 0 : if( !strcmp( tile->name, CRITICAL_TILES[ k ] ) ) {
441 0 : is_ht_critical = 1;
442 0 : break;
443 0 : }
444 0 : }
445 0 : }
446 :
447 0 : if( FD_UNLIKELY( is_ht_critical ) ) {
448 0 : ulong try_assign = cpu_idx;
449 0 : while( cpu_assigned[ cpu_ordering[ try_assign ] ] || (cpus->cpu[ cpu_ordering[ try_assign ] ].sibling!=ULONG_MAX && cpu_assigned[ cpus->cpu[ cpu_ordering[ try_assign ] ].sibling ]) ) {
450 0 : try_assign++;
451 0 : if( FD_UNLIKELY( try_assign>=cpus->cpu_cnt ) ) FD_LOG_ERR(( "auto layout cannot set affinity for tile `%s:%lu` because all the CPUs are already assigned or have a HT pair assigned", tile->name, tile->kind_id ));
452 0 : }
453 :
454 0 : ulong sibling = cpus->cpu[ cpu_ordering[ try_assign ] ].sibling;
455 0 : cpu_assigned[ cpu_ordering[ try_assign ] ] = 1;
456 0 : if( sibling!=ULONG_MAX ) {
457 0 : cpu_assigned[ sibling ] = 1;
458 0 : }
459 0 : tile->cpu_idx = cpu_ordering[ try_assign ];
460 0 : while( cpu_assigned[ cpu_ordering[ cpu_idx ] ] ) cpu_idx++;
461 0 : } else {
462 0 : cpu_assigned[ cpu_ordering[ cpu_idx ] ] = 1;
463 0 : tile->cpu_idx = cpu_ordering[ cpu_idx ];
464 0 : while( cpu_assigned[ cpu_ordering[ cpu_idx ] ] ) cpu_idx++;
465 0 : }
466 0 : }
467 0 : }
468 0 : }
469 0 : }
470 :
471 : /* Make sure all the tiles we haven't set are supposed to be floating. */
472 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
473 0 : fd_topo_tile_t * tile = &topo->tiles[ i ];
474 0 : if( tile->cpu_idx!=ULONG_MAX ) continue;
475 :
476 0 : int found = 0;
477 0 : for( ulong j=0UL; j<sizeof(FLOATING)/sizeof(FLOATING[0]); j++ ) {
478 0 : if( !strcmp( tile->name, FLOATING[ j ] ) ) {
479 0 : found = 1;
480 0 : break;
481 0 : }
482 0 : }
483 :
484 0 : if( FD_UNLIKELY( !found ) ) FD_LOG_WARNING(( "auto layout cannot affine tile `%s:%lu` because it is unknown. Leaving it floating", tile->name, tile->kind_id ));
485 0 : }
486 :
487 0 : if( FD_UNLIKELY( reserve_agave_cores ) ) {
488 0 : for( ulong i=cpu_idx; i<cpus->cpu_cnt; i++ ) {
489 0 : if( FD_UNLIKELY( !cpus->cpu[ cpu_ordering[ i ] ].online ) ) continue;
490 :
491 0 : if( FD_LIKELY( topo->agave_affinity_cnt<sizeof(topo->agave_affinity_cpu_idx)/sizeof(topo->agave_affinity_cpu_idx[0]) ) ) {
492 0 : topo->agave_affinity_cpu_idx[ topo->agave_affinity_cnt++ ] = cpu_ordering[ i ];
493 0 : }
494 0 : }
495 0 : }
496 0 : }
497 :
498 : ulong
499 : fd_numa_node_idx( ulong cpu_idx );
500 :
501 : static void
502 0 : initialize_numa_assignments( fd_topo_t * topo ) {
503 : /* Assign workspaces to NUMA nodes. The heuristic here is pretty
504 : simple for now: workspaces go on the NUMA node of the first
505 : tile which maps the largest object in the workspace. */
506 :
507 0 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
508 0 : ulong max_footprint = 0UL;
509 0 : ulong max_obj = ULONG_MAX;
510 :
511 0 : for( ulong j=0UL; j<topo->obj_cnt; j++ ) {
512 0 : fd_topo_obj_t * obj = &topo->objs[ j ];
513 0 : if( obj->wksp_id!=i ) continue;
514 :
515 0 : if( FD_UNLIKELY( !max_footprint || obj->footprint>max_footprint ) ) {
516 0 : max_footprint = obj->footprint;
517 0 : max_obj = j;
518 0 : }
519 0 : }
520 :
521 0 : if( FD_UNLIKELY( max_obj==ULONG_MAX ) ) FD_LOG_ERR(( "no object found for workspace %s", topo->workspaces[ i ].name ));
522 :
523 0 : int found_strict = 0;
524 0 : int found_lazy = 0;
525 0 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
526 0 : fd_topo_tile_t * tile = &topo->tiles[ j ];
527 0 : if( FD_UNLIKELY( tile->tile_obj_id==max_obj && tile->cpu_idx<FD_TILE_MAX ) ) {
528 0 : topo->workspaces[ i ].numa_idx = fd_numa_node_idx( tile->cpu_idx );
529 0 : FD_TEST( topo->workspaces[ i ].numa_idx!=ULONG_MAX );
530 0 : found_strict = 1;
531 0 : found_lazy = 1;
532 0 : break;
533 0 : } else if( FD_UNLIKELY( tile->tile_obj_id==max_obj && tile->cpu_idx>=FD_TILE_MAX ) ) {
534 0 : topo->workspaces[ i ].numa_idx = 0;
535 0 : found_lazy = 1;
536 0 : break;
537 0 : }
538 0 : }
539 :
540 0 : if( FD_LIKELY( !found_strict ) ) {
541 0 : for( ulong j=0UL; j<topo->tile_cnt; j++ ) {
542 0 : fd_topo_tile_t * tile = &topo->tiles[ j ];
543 0 : for( ulong k=0UL; k<tile->uses_obj_cnt; k++ ) {
544 0 : if( FD_LIKELY( tile->uses_obj_id[ k ]==max_obj && tile->cpu_idx<FD_TILE_MAX ) ) {
545 0 : topo->workspaces[ i ].numa_idx = fd_numa_node_idx( tile->cpu_idx );
546 0 : FD_TEST( topo->workspaces[ i ].numa_idx!=ULONG_MAX );
547 0 : found_lazy = 1;
548 0 : break;
549 0 : } else if( FD_UNLIKELY( tile->uses_obj_id[ k ]==max_obj ) && tile->cpu_idx>=FD_TILE_MAX ) {
550 0 : topo->workspaces[ i ].numa_idx = 0;
551 0 : found_lazy = 1;
552 : /* Don't break, keep looking -- a tile with a CPU assignment
553 : might also use object in which case we want to use that
554 : NUMA node. */
555 0 : }
556 0 : }
557 :
558 0 : if( FD_UNLIKELY( found_lazy ) ) break;
559 0 : }
560 0 : }
561 :
562 0 : if( FD_UNLIKELY( !found_lazy ) ) FD_LOG_ERR(( "no tile uses object %s for workspace %s", topo->objs[ max_obj ].name, topo->workspaces[ i ].name ));
563 0 : }
564 0 : }
565 :
566 : void
567 : fd_topob_finish( fd_topo_t * topo,
568 0 : fd_topo_obj_callbacks_t ** callbacks ) {
569 0 : for( ulong z=0UL; z<topo->tile_cnt; z++ ) {
570 0 : fd_topo_tile_t * tile = &topo->tiles[ z ];
571 :
572 0 : ulong in_cnt = 0UL;
573 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
574 0 : if( FD_UNLIKELY( !tile->in_link_poll[ i ] ) ) continue;
575 0 : in_cnt++;
576 0 : }
577 :
578 0 : ulong cons_cnt = 0UL;
579 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
580 0 : fd_topo_tile_t * consumer_tile = &topo->tiles[ i ];
581 0 : for( ulong j=0UL; j<consumer_tile->in_cnt; j++ ) {
582 0 : for( ulong k=0UL; k<tile->out_cnt; k++ ) {
583 0 : if( FD_UNLIKELY( consumer_tile->in_link_id[ j ]==tile->out_link_id[ k ] && consumer_tile->in_link_reliable[ j ] ) ) {
584 0 : cons_cnt++;
585 0 : }
586 0 : }
587 0 : }
588 0 : }
589 :
590 0 : FD_TEST( !fd_pod_replacef_ulong( topo->props, in_cnt, "obj.%lu.in_cnt", tile->metrics_obj_id ) );
591 0 : FD_TEST( !fd_pod_replacef_ulong( topo->props, cons_cnt, "obj.%lu.cons_cnt", tile->metrics_obj_id ) );
592 0 : }
593 :
594 0 : for( ulong i=0UL; i<topo->wksp_cnt; i++ ) {
595 0 : fd_topo_wksp_t * wksp = &topo->workspaces[ i ];
596 :
597 0 : ulong loose_sz = 0UL;
598 0 : for( ulong j=0UL; j<topo->obj_cnt; j++ ) {
599 0 : fd_topo_obj_t * obj = &topo->objs[ j ];
600 0 : if( FD_UNLIKELY( obj->wksp_id!=wksp->id ) ) continue;
601 :
602 0 : fd_topo_obj_callbacks_t * cb = NULL;
603 0 : for( ulong i=0UL; callbacks[ i ]; i++ ) {
604 0 : if( FD_UNLIKELY( !strcmp( callbacks[ i ]->name, obj->name ) ) ) {
605 0 : cb = callbacks[ i ];
606 0 : break;
607 0 : }
608 0 : }
609 0 : if( FD_UNLIKELY( !cb ) ) FD_LOG_ERR(( "no callbacks for object %s", obj->name ));
610 :
611 0 : if( FD_UNLIKELY( cb->loose ) ) loose_sz += cb->loose( topo, obj );
612 0 : }
613 :
614 0 : ulong part_max = wksp->part_max;
615 0 : if( !part_max ) part_max = (loose_sz / (64UL << 10)); /* alloc + residual padding */
616 0 : part_max += 3; /* for initial alignment */
617 0 : ulong offset = fd_ulong_align_up( fd_wksp_private_data_off( part_max ), fd_topo_workspace_align() );
618 :
619 0 : for( ulong j=0UL; j<topo->obj_cnt; j++ ) {
620 0 : fd_topo_obj_t * obj = &topo->objs[ j ];
621 0 : if( FD_UNLIKELY( obj->wksp_id!=wksp->id ) ) continue;
622 :
623 0 : fd_topo_obj_callbacks_t * cb = NULL;
624 0 : for( ulong i=0UL; callbacks[ i ]; i++ ) {
625 0 : if( FD_UNLIKELY( !strcmp( callbacks[ i ]->name, obj->name ) ) ) {
626 0 : cb = callbacks[ i ];
627 0 : break;
628 0 : }
629 0 : }
630 0 : if( FD_UNLIKELY( !cb ) ) FD_LOG_ERR(( "no callbacks for object %s", obj->name ));
631 :
632 0 : ulong align_ = cb->align( topo, obj );
633 0 : if( FD_UNLIKELY( !fd_ulong_is_pow2( align_ ) ) ) FD_LOG_ERR(( "Return value of fdctl_obj_align(%s,%lu) is not a power of 2", obj->name, obj->id ));
634 0 : offset = fd_ulong_align_up( offset, align_ );
635 0 : obj->offset = offset;
636 0 : obj->footprint = cb->footprint( topo, obj );
637 0 : if( FD_UNLIKELY( 0!=strcmp( obj->name, "tile" ) && (!obj->footprint || obj->footprint>LONG_MAX) ) ) {
638 0 : FD_LOG_ERR(( "fdctl_obj_footprint(%s,%lu) failed", obj->name, obj->id ));
639 0 : }
640 0 : offset += obj->footprint;
641 0 : }
642 :
643 0 : ulong footprint = fd_ulong_align_up( offset, fd_topo_workspace_align() );
644 :
645 : /* Compute footprint for a workspace that can store our footprint,
646 : with an extra align of padding incase gaddr_lo is not aligned. */
647 0 : ulong total_wksp_footprint = fd_wksp_footprint( part_max, footprint + fd_topo_workspace_align() + loose_sz );
648 :
649 0 : ulong page_sz = topo->max_page_size;
650 0 : if( total_wksp_footprint < topo->gigantic_page_threshold ) page_sz = FD_SHMEM_HUGE_PAGE_SZ;
651 0 : if( FD_UNLIKELY( page_sz!=FD_SHMEM_HUGE_PAGE_SZ && page_sz!=FD_SHMEM_GIGANTIC_PAGE_SZ ) ) FD_LOG_ERR(( "invalid page_sz" ));
652 :
653 : /* If the workspace is not locked, we can't use huge pages. */
654 0 : if( FD_UNLIKELY( !wksp->is_locked ) ) {
655 0 : page_sz = FD_SHMEM_NORMAL_PAGE_SZ;
656 0 : }
657 :
658 0 : ulong wksp_aligned_footprint = fd_ulong_align_up( total_wksp_footprint, page_sz );
659 :
660 : /* Give any leftover space in the underlying shared memory to the
661 : data region of the workspace, since we might as well use it. */
662 0 : wksp->part_max = part_max;
663 0 : wksp->known_footprint = footprint;
664 0 : wksp->total_footprint = wksp_aligned_footprint - fd_ulong_align_up( fd_wksp_private_data_off( part_max ), fd_topo_workspace_align() );
665 0 : wksp->page_sz = page_sz;
666 0 : wksp->page_cnt = wksp_aligned_footprint / page_sz;
667 0 : }
668 :
669 0 : initialize_numa_assignments( topo );
670 :
671 0 : validate( topo );
672 0 : }
|