Line data Source code
1 : #include "../../../../disco/tiles.h"
2 :
3 : #include "generated/cswtch_seccomp.h"
4 :
5 : #include "../../../../disco/metrics/fd_metrics.h"
6 :
7 : #include <unistd.h>
8 : #include <fcntl.h>
9 : #include <errno.h>
10 :
11 0 : #define REPORT_INTERVAL_MILLIS (100L)
12 :
13 : typedef struct {
14 : long next_report_nanos;
15 :
16 : ulong tile_cnt;
17 : int status_fds[ FD_TILE_MAX ];
18 : volatile ulong * metrics[ FD_TILE_MAX ];
19 : } fd_cswtch_ctx_t;
20 :
21 : FD_FN_CONST static inline ulong
22 3 : scratch_align( void ) {
23 3 : return 128UL;
24 3 : }
25 :
26 : FD_FN_PURE static inline ulong
27 3 : scratch_footprint( fd_topo_tile_t const * tile ) {
28 3 : (void)tile;
29 3 : ulong l = FD_LAYOUT_INIT;
30 3 : l = FD_LAYOUT_APPEND( l, alignof( fd_cswtch_ctx_t ), sizeof( fd_cswtch_ctx_t ) );
31 3 : return FD_LAYOUT_FINI( l, scratch_align() );
32 3 : }
33 :
34 : static void
35 : before_credit( fd_cswtch_ctx_t * ctx,
36 : fd_stem_context_t * mux,
37 0 : int * charge_busy ) {
38 0 : (void)mux;
39 :
40 0 : long now = fd_log_wallclock();
41 0 : if( FD_UNLIKELY( now<ctx->next_report_nanos ) ) return;
42 0 : ctx->next_report_nanos += REPORT_INTERVAL_MILLIS*1000L*1000L;
43 :
44 0 : *charge_busy = 1;
45 :
46 0 : for( ulong i=0UL; i<ctx->tile_cnt; i++ ) {
47 0 : if( FD_UNLIKELY( -1==lseek( ctx->status_fds[ i ], 0, SEEK_SET ) ) ) FD_LOG_ERR(( "lseek failed (%i-%s)", errno, strerror( errno ) ));
48 :
49 0 : char contents[ 4096 ] = {0};
50 0 : ulong contents_len = 0UL;
51 :
52 0 : int process_died = 0;
53 0 : while( 1 ) {
54 0 : if( FD_UNLIKELY( contents_len>=sizeof( contents ) ) ) FD_LOG_ERR(( "contents overflow" ));
55 0 : long n = read( ctx->status_fds[ i ], contents + contents_len, sizeof( contents ) - contents_len );
56 0 : if( FD_UNLIKELY( -1==n ) ) {
57 0 : if( FD_UNLIKELY( errno==ESRCH ) ) {
58 0 : process_died = 1;
59 0 : break;
60 0 : }
61 0 : FD_LOG_ERR(( "read failed (%i-%s)", errno, strerror( errno ) ));
62 0 : }
63 0 : if( FD_LIKELY( 0==n ) ) break;
64 0 : contents_len += (ulong)n;
65 0 : }
66 :
67 : /* Supervisor is going to bring the whole process tree down if any
68 : of the target PIDs died, so we can ignore this and wait. */
69 0 : if( FD_UNLIKELY( process_died ) ) {
70 0 : FD_LOG_WARNING(( "cannot get context switch metrics for dead tile idx %lu", i ));
71 0 : continue;
72 0 : }
73 :
74 0 : int found_voluntary = 0;
75 0 : int found_involuntary = 0;
76 :
77 0 : char * line = contents;
78 0 : while( 1 ) {
79 0 : char * next_line = strchr( line, '\n' );
80 0 : if( FD_UNLIKELY( NULL==next_line ) ) break;
81 0 : *next_line = '\0';
82 :
83 0 : char * colon = strchr( line, ':' );
84 0 : if( FD_UNLIKELY( NULL==colon ) ) FD_LOG_ERR(( "no colon in line '%s'", line ));
85 :
86 0 : *colon = '\0';
87 0 : char * key = line;
88 0 : char * value = colon + 1;
89 :
90 0 : while( ' '==*value || '\t'==*value ) value++;
91 :
92 0 : if( FD_LIKELY( !strncmp( key, "voluntary_ctxt_switches", 23UL ) ) ) {
93 0 : char * endptr;
94 0 : ulong voluntary_ctxt_switches = strtoul( value, &endptr, 10 );
95 0 : if( FD_UNLIKELY( *endptr!='\0' || voluntary_ctxt_switches==ULONG_MAX ) ) FD_LOG_ERR(( "strtoul failed" ));
96 0 : ctx->metrics[ i ][ FD_METRICS_COUNTER_TILE_CONTEXT_SWITCH_VOLUNTARY_COUNT_OFF ] = voluntary_ctxt_switches;
97 0 : found_voluntary = 1;
98 0 : } else if( FD_LIKELY( !strncmp( key, "nonvoluntary_ctxt_switches", 26UL ) ) ) {
99 0 : char * endptr;
100 0 : ulong involuntary_ctxt_switches = strtoul( value, &endptr, 10 );
101 0 : if( FD_UNLIKELY( *endptr!='\0' || involuntary_ctxt_switches==ULONG_MAX ) ) FD_LOG_ERR(( "strtoul failed" ));
102 0 : ctx->metrics[ i ][ FD_METRICS_COUNTER_TILE_CONTEXT_SWITCH_INVOLUNTARY_COUNT_OFF ] = involuntary_ctxt_switches;
103 0 : found_involuntary = 1;
104 0 : }
105 :
106 0 : line = next_line + 1;
107 0 : }
108 :
109 0 : if( FD_UNLIKELY( !found_voluntary ) ) FD_LOG_ERR(( "voluntary_ctxt_switches not found" ));
110 0 : if( FD_UNLIKELY( !found_involuntary ) ) FD_LOG_ERR(( "nonvoluntary_ctxt_switches not found" ));
111 0 : }
112 0 : }
113 :
114 : static void
115 : privileged_init( fd_topo_t * topo,
116 0 : fd_topo_tile_t * tile ) {
117 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
118 :
119 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
120 0 : fd_cswtch_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_cswtch_ctx_t ), sizeof( fd_cswtch_ctx_t ) );
121 :
122 0 : FD_TEST( topo->tile_cnt<FD_TILE_MAX );
123 :
124 0 : ctx->tile_cnt = topo->tile_cnt;
125 0 : for( ulong i=0UL; i<topo->tile_cnt; i++ ) {
126 0 : ulong * metrics = fd_metrics_join( fd_topo_obj_laddr( topo, topo->tiles[ i ].metrics_obj_id ) );
127 :
128 0 : for(;;) {
129 0 : ulong pid, tid;
130 0 : if( FD_UNLIKELY( tile->id==i ) ) {
131 0 : pid = fd_sandbox_getpid();
132 0 : tid = fd_sandbox_gettid();
133 0 : } else {
134 0 : pid = fd_metrics_tile( metrics )[ FD_METRICS_GAUGE_TILE_PID_OFF ];
135 0 : tid = fd_metrics_tile( metrics )[ FD_METRICS_GAUGE_TILE_TID_OFF ];
136 0 : if( FD_UNLIKELY( !pid || !tid ) ) {
137 0 : FD_SPIN_PAUSE();
138 0 : continue;
139 0 : }
140 0 : }
141 :
142 0 : char path[ 64 ];
143 0 : FD_TEST( fd_cstr_printf_check( path, sizeof( path ), NULL, "/proc/%lu/task/%lu/status", pid, tid ) );
144 0 : ctx->status_fds[ i ] = open( path, O_RDONLY );
145 0 : ctx->metrics[ i ] = fd_metrics_tile( metrics );
146 0 : if( FD_UNLIKELY( -1==ctx->status_fds[ i ] ) ) FD_LOG_ERR(( "open failed (%i-%s)", errno, strerror( errno ) ));
147 0 : break;
148 0 : }
149 0 : }
150 0 : }
151 :
152 : static void
153 : unprivileged_init( fd_topo_t * topo,
154 0 : fd_topo_tile_t * tile ) {
155 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
156 :
157 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
158 0 : fd_cswtch_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_cswtch_ctx_t ), sizeof( fd_cswtch_ctx_t ) );
159 :
160 0 : ctx->next_report_nanos = fd_log_wallclock();
161 :
162 0 : ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, 1UL );
163 0 : if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
164 0 : FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
165 0 : }
166 :
167 : static ulong
168 : populate_allowed_seccomp( fd_topo_t const * topo,
169 : fd_topo_tile_t const * tile,
170 : ulong out_cnt,
171 0 : struct sock_filter * out ) {
172 0 : (void)topo;
173 0 : (void)tile;
174 :
175 0 : populate_sock_filter_policy_cswtch( out_cnt, out, (uint)fd_log_private_logfile_fd() );
176 0 : return sock_filter_policy_cswtch_instr_cnt;
177 0 : }
178 :
179 : static ulong
180 : populate_allowed_fds( fd_topo_t const * topo,
181 : fd_topo_tile_t const * tile,
182 : ulong out_fds_cnt,
183 0 : int * out_fds ) {
184 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
185 :
186 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
187 0 : fd_cswtch_ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof( fd_cswtch_ctx_t ), sizeof( fd_cswtch_ctx_t ) );
188 :
189 0 : if( FD_UNLIKELY( out_fds_cnt<2UL+ctx->tile_cnt ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
190 :
191 0 : ulong out_cnt = 0UL;
192 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
193 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
194 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
195 0 : for( ulong i=0UL; i<ctx->tile_cnt; i++ )
196 0 : out_fds[ out_cnt++ ] = ctx->status_fds[ i ]; /* /proc/<pid>/task/<tid>/status descriptor */
197 0 : return out_cnt;
198 0 : }
199 :
200 0 : #define STEM_BURST (1UL)
201 :
202 : /* See explanation in fd_pack */
203 0 : #define STEM_LAZY (128L*3000L)
204 :
205 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_cswtch_ctx_t
206 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_cswtch_ctx_t)
207 :
208 0 : #define STEM_CALLBACK_BEFORE_CREDIT before_credit
209 :
210 : #include "../../../../disco/stem/fd_stem.c"
211 :
212 : fd_topo_run_tile_t fd_tile_cswtch = {
213 : .name = "cswtch",
214 : .populate_allowed_seccomp = populate_allowed_seccomp,
215 : .populate_allowed_fds = populate_allowed_fds,
216 : .scratch_align = scratch_align,
217 : .scratch_footprint = scratch_footprint,
218 : .privileged_init = privileged_init,
219 : .unprivileged_init = unprivileged_init,
220 : .run = stem_run,
221 : };
|