Line data Source code
1 : #include "fd_cnc.h"
2 :
3 : ulong
4 63 : fd_cnc_align( void ) {
5 63 : return FD_CNC_ALIGN;
6 63 : }
7 :
8 : ulong
9 123 : fd_cnc_footprint( ulong app_sz ) {
10 123 : if( FD_UNLIKELY( app_sz > (ULONG_MAX-191UL) ) ) return 0UL; /* overflow */
11 117 : return FD_CNC_FOOTPRINT( app_sz );
12 123 : }
13 :
14 : void *
15 : fd_cnc_new( void * shmem,
16 : ulong app_sz,
17 : ulong type,
18 57 : long now ) {
19 57 : fd_cnc_t * cnc = (fd_cnc_t *)shmem;
20 :
21 57 : if( FD_UNLIKELY( !shmem ) ) {
22 0 : FD_LOG_WARNING(( "NULL shmem" ));
23 0 : return NULL;
24 0 : }
25 :
26 57 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, fd_cnc_align() ) ) ) {
27 0 : FD_LOG_WARNING(( "misaligned shmem" ));
28 0 : return NULL;
29 0 : }
30 :
31 57 : ulong footprint = fd_cnc_footprint( app_sz );
32 57 : if( FD_UNLIKELY( !footprint ) ) {
33 0 : FD_LOG_WARNING(( "bad app_sz (%lu)", app_sz ));
34 0 : return NULL;
35 0 : }
36 :
37 57 : fd_memset( cnc, 0, footprint );
38 :
39 57 : cnc->app_sz = app_sz;
40 57 : cnc->type = type;
41 57 : cnc->heartbeat0 = now;
42 57 : cnc->heartbeat = now;
43 57 : cnc->lock = 0UL;
44 57 : cnc->signal = FD_CNC_SIGNAL_BOOT;
45 :
46 57 : FD_COMPILER_MFENCE();
47 57 : FD_VOLATILE( cnc->magic ) = FD_CNC_MAGIC;
48 57 : FD_COMPILER_MFENCE();
49 :
50 57 : return (void *)cnc;
51 57 : }
52 :
53 : fd_cnc_t *
54 216 : fd_cnc_join( void * shcnc ) {
55 :
56 216 : if( FD_UNLIKELY( !shcnc ) ) {
57 0 : FD_LOG_WARNING(( "NULL shcnc" ));
58 0 : return NULL;
59 0 : }
60 :
61 216 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shcnc, fd_cnc_align() ) ) ) {
62 0 : FD_LOG_WARNING(( "misaligned shcnc" ));
63 0 : return NULL;
64 0 : }
65 :
66 216 : fd_cnc_t * cnc = (fd_cnc_t *)shcnc;
67 :
68 216 : if( FD_UNLIKELY( cnc->magic!=FD_CNC_MAGIC ) ) {
69 0 : FD_LOG_WARNING(( "bad magic" ));
70 0 : return NULL;
71 0 : }
72 :
73 216 : return cnc;
74 216 : }
75 :
76 : void *
77 216 : fd_cnc_leave( fd_cnc_t const * cnc ) {
78 :
79 216 : if( FD_UNLIKELY( !cnc ) ) {
80 0 : FD_LOG_WARNING(( "NULL cnc" ));
81 0 : return NULL;
82 0 : }
83 :
84 216 : return (void *)cnc; /* Kinda ugly const cast */
85 216 : }
86 :
87 : void *
88 9 : fd_cnc_delete( void * shcnc ) {
89 :
90 9 : if( FD_UNLIKELY( !shcnc ) ) {
91 0 : FD_LOG_WARNING(( "NULL shcnc" ));
92 0 : return NULL;
93 0 : }
94 :
95 9 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shcnc, fd_cnc_align() ) ) ) {
96 0 : FD_LOG_WARNING(( "misaligned shcnc" ));
97 0 : return NULL;
98 0 : }
99 :
100 9 : fd_cnc_t * cnc = (fd_cnc_t *)shcnc;
101 :
102 9 : if( FD_UNLIKELY( cnc->magic!=FD_CNC_MAGIC ) ) {
103 3 : FD_LOG_WARNING(( "bad magic" ));
104 3 : return NULL;
105 3 : }
106 :
107 6 : FD_COMPILER_MFENCE();
108 6 : FD_VOLATILE( cnc->magic ) = 0UL;
109 6 : FD_COMPILER_MFENCE();
110 :
111 6 : return (void *)cnc;
112 9 : }
113 :
114 : #if FD_HAS_HOSTED && FD_HAS_ATOMIC
115 :
116 : #include <errno.h>
117 : #include <signal.h>
118 : #include <sched.h>
119 :
120 : int
121 105 : fd_cnc_open( fd_cnc_t * cnc ) {
122 :
123 : /* Check input args */
124 :
125 105 : if( FD_UNLIKELY( !cnc ) ) {
126 0 : FD_LOG_WARNING(( "NULL cnc" ));
127 0 : return FD_CNC_ERR_INVAL;
128 0 : }
129 :
130 105 : ulong my_pid = fd_log_group_id();
131 105 : if( FD_UNLIKELY( (!my_pid) | (my_pid!=(ulong)(pid_t)my_pid) ) ) {
132 0 : FD_LOG_WARNING(( "unexpected pid (%lu)", my_pid ));
133 0 : return FD_CNC_ERR_UNSUP;
134 0 : }
135 :
136 : /* Try to acquire a lock on the cnc */
137 :
138 105 : FD_COMPILER_MFENCE();
139 105 : ulong cnc_pid = FD_ATOMIC_CAS( &cnc->lock, 0UL, my_pid );
140 105 : FD_COMPILER_MFENCE();
141 :
142 105 : if( FD_LIKELY( !cnc_pid ) ) {
143 :
144 : /* Got the lock ... get the status of the app thread. */
145 :
146 105 : ulong signal = fd_cnc_signal_query( cnc );
147 :
148 : /* If the app thread was in the run state, return success. */
149 :
150 105 : if( FD_LIKELY( signal==FD_CNC_SIGNAL_RUN ) ) return FD_CNC_SUCCESS;
151 :
152 : /* At this point, since RUN was not observed, we can't safely issue
153 : signals to the app thread. So we unlock the lock. If FAIL was
154 : observed, we know that this thread is permanently dead and we
155 : hard fail the open request. If BOOT, HALT or USER defined, we
156 : can't guarantee that we will never be able to open up a command
157 : session, so we tell the user to try again later. */
158 :
159 0 : FD_COMPILER_MFENCE();
160 0 : FD_VOLATILE( cnc->lock ) = 0UL;
161 0 : FD_COMPILER_MFENCE();
162 :
163 0 : if( FD_LIKELY( signal==FD_CNC_SIGNAL_FAIL ) ) {
164 0 : FD_LOG_WARNING(( "app thread failed; unable to open command session" ));
165 0 : return FD_CNC_ERR_FAIL;
166 0 : }
167 :
168 0 : char buf[ FD_CNC_SIGNAL_CSTR_BUF_MAX ];
169 0 : FD_LOG_WARNING(( "signal %s (%lu) in progress on app thread; try again later?", fd_cnc_signal_cstr( signal, buf ), signal ));
170 0 : return FD_CNC_ERR_AGAIN;
171 0 : }
172 :
173 : /* Somebody else seems to have an open command session on the app
174 : thread. Check that the somebody else is alive. */
175 :
176 0 : if( FD_UNLIKELY( cnc_pid!=my_pid && kill( (pid_t)cnc_pid, 0 ) ) ) {
177 :
178 0 : int err = errno;
179 0 : if( FD_LIKELY( err==ESRCH ) ) {
180 :
181 : /* A process died with an open command session. Try to clean up
182 : after it and resume. */
183 :
184 0 : if( FD_LIKELY( FD_ATOMIC_CAS( &cnc->lock, cnc_pid, my_pid )==cnc_pid ) ) {
185 :
186 : /* We successfully reclaimed the lock from the dead process. If
187 : there is a pending signal from it still being processed by
188 : the app thread (e.g. HALT or USER defined), wait briefly for
189 : it complete and then decide how best to proceed. (Note: this
190 : assumes no pid reuse between the kill above cas.) */
191 :
192 0 : ulong signal = fd_cnc_signal_query( cnc );
193 :
194 0 : if( FD_UNLIKELY( !( (signal==FD_CNC_SIGNAL_BOOT) | (signal==FD_CNC_SIGNAL_RUN ) | (signal==FD_CNC_SIGNAL_FAIL) ) ) )
195 0 : signal = fd_cnc_wait( cnc, signal, (ulong)100e6, NULL ); /* 100 ms */
196 :
197 0 : if( FD_LIKELY( signal==FD_CNC_SIGNAL_RUN ) ) {
198 :
199 : /* App thread seem to be running and we have the lock. Looks
200 : like we can recover. */
201 :
202 0 : FD_LOG_WARNING(( "pid %lu died with an open command session; attempting to recover", cnc_pid ));
203 0 : return FD_CNC_SUCCESS;
204 0 : }
205 :
206 0 : if( FD_LIKELY( signal==FD_CNC_SIGNAL_BOOT ) ) {
207 :
208 : /* Last signal apparently stopped the app thread and left it
209 : in a state where it can be booted again safely. Unlock the
210 : session lock to end the stale command session (so that the
211 : thread can be booted again) and fail this open request with
212 : try again later as this open might succeed in the future
213 : (i.e. after the thread is booted in the run state again). */
214 :
215 0 : FD_COMPILER_MFENCE();
216 0 : FD_VOLATILE( cnc->lock ) = 0UL;
217 0 : FD_COMPILER_MFENCE();
218 :
219 0 : FD_LOG_WARNING(( "pid %lu died with an open command session that cleanly halted the app thread; try again later?",
220 0 : cnc_pid ));
221 0 : return FD_CNC_ERR_AGAIN;
222 0 : }
223 :
224 0 : if( FD_LIKELY( signal==FD_CNC_SIGNAL_FAIL ) ) {
225 :
226 : /* Last signal apparently stopped the app thread and left it
227 : in a state where it cannot be booted again safely. Unlock
228 : the session lock to end the stale command session (so that
229 : the app thread can be cleaned up) and fail this open
230 : request. */
231 :
232 0 : FD_COMPILER_MFENCE();
233 0 : FD_VOLATILE( cnc->lock ) = 0UL;
234 0 : FD_COMPILER_MFENCE();
235 :
236 0 : FD_LOG_WARNING(( "pid %lu died with an open command session that uncleanly halted the app thread", cnc_pid ));
237 0 : return FD_CNC_ERR_FAIL;
238 0 : }
239 :
240 : /* App thread seems to be still processing a HALT or USER
241 : defined signal. Restore the lock to the dead pid and tell
242 : the user to try again later (when we might know better how to
243 : recover). */
244 :
245 0 : FD_COMPILER_MFENCE();
246 0 : FD_VOLATILE( cnc->lock ) = cnc_pid;
247 0 : FD_COMPILER_MFENCE();
248 :
249 0 : FD_LOG_WARNING(( "pid %lu died with an open command session and last signal issued (%lu) still seems to be pending; "
250 0 : "try again later?", cnc_pid, signal ));
251 0 : return FD_CNC_ERR_AGAIN;
252 0 : }
253 :
254 : /* Another thread reclaimed the lock before we could. Presumably
255 : that thread will recover the lock so we tell the user to try
256 : again later. */
257 :
258 0 : FD_LOG_WARNING(( "pid %lu died with an open command session and another thread is trying to clean it up; try again later?",
259 0 : cnc_pid ));
260 0 : return FD_CNC_ERR_AGAIN;
261 0 : }
262 :
263 : /* There is an open command session but we can't tell if the pid
264 : running it is live. Assume it is and tell the user to try again
265 : later. */
266 :
267 0 : FD_LOG_WARNING(( "pid %lu currently command session and unable to diagnose pid's state (%i-%s); try again later?",
268 0 : cnc_pid, err, fd_io_strerror( err ) ));
269 0 : return FD_CNC_ERR_AGAIN;
270 0 : }
271 :
272 : /* There is already an open command session from a seemingly live
273 : process */
274 :
275 0 : FD_LOG_WARNING(( "pid %lu currently has an open command session; try again later?", cnc_pid ));
276 0 : return FD_CNC_ERR_AGAIN;
277 0 : }
278 :
279 : #else
280 :
281 : int
282 : fd_cnc_open( fd_cnc_t * cnc ) {
283 : (void)cnc;
284 : FD_LOG_WARNING(( "unsupported for this build target" ));
285 : return FD_CNC_ERR_UNSUP;
286 : }
287 :
288 : #endif
289 :
290 : ulong
291 : fd_cnc_wait( fd_cnc_t const * cnc,
292 : ulong test,
293 : long dt,
294 492 : long * _opt_now ) {
295 492 : long then = fd_log_wallclock();
296 492 : long now = then;
297 :
298 492 : ulong obs;
299 7236 : for(;;) {
300 7236 : obs = fd_cnc_signal_query( cnc );
301 7236 : int done = ((obs!=test) | ((now-then)>dt));
302 7236 : FD_COMPILER_FORGET( done ); /* avoid compiler misoptimization */
303 7236 : if( FD_LIKELY( done ) ) break; /* optimize for exit, single exit to optimize spin pause hinting */
304 6744 : FD_YIELD();
305 6744 : now = fd_log_wallclock();
306 6744 : }
307 :
308 492 : if( _opt_now ) *_opt_now = now; /* usage dep prob */
309 492 : return obs;
310 492 : }
311 :
312 : char const *
313 18 : fd_cnc_strerror( int err ) {
314 18 : switch( err ) {
315 3 : case FD_CNC_SUCCESS: return "success";
316 3 : case FD_CNC_ERR_UNSUP: return "unsupported here";
317 3 : case FD_CNC_ERR_INVAL: return "bad inputs";
318 3 : case FD_CNC_ERR_AGAIN: return "try again later";
319 3 : case FD_CNC_ERR_FAIL: return "app thread failed";
320 3 : default: break;
321 18 : }
322 3 : return "unknown---possibly not a cnc error code";
323 18 : }
324 :
325 : ulong
326 120 : fd_cstr_to_cnc_signal( char const * cstr ) {
327 120 : if( FD_UNLIKELY( !cstr ) ) return FD_CNC_SIGNAL_RUN;
328 120 : if( !fd_cstr_casecmp( cstr, "run" ) ) return FD_CNC_SIGNAL_RUN;
329 117 : if( !fd_cstr_casecmp( cstr, "boot" ) ) return FD_CNC_SIGNAL_BOOT;
330 114 : if( !fd_cstr_casecmp( cstr, "fail" ) ) return FD_CNC_SIGNAL_FAIL;
331 111 : if( !fd_cstr_casecmp( cstr, "halt" ) ) return FD_CNC_SIGNAL_HALT;
332 3 : return fd_cstr_to_ulong( cstr );
333 111 : }
334 :
335 : char *
336 : fd_cnc_signal_cstr( ulong signal,
337 222 : char * buf ) {
338 222 : if( FD_LIKELY( buf ) ) {
339 222 : switch( signal ) {
340 3 : case FD_CNC_SIGNAL_RUN: strcpy( buf, "run" ); break;
341 108 : case FD_CNC_SIGNAL_BOOT: strcpy( buf, "boot" ); break;
342 3 : case FD_CNC_SIGNAL_FAIL: strcpy( buf, "fail" ); break;
343 105 : case FD_CNC_SIGNAL_HALT: strcpy( buf, "halt" ); break;
344 3 : default: fd_cstr_printf( buf, FD_CNC_SIGNAL_CSTR_BUF_MAX, NULL, "%lu", signal ); break;
345 222 : }
346 222 : }
347 222 : return buf;
348 222 : }
|