Line data Source code
1 : #include "fd_accdb_sync.h"
2 :
3 : fd_accdb_user_t *
4 : fd_accdb_user_join( fd_accdb_user_t * ljoin,
5 57 : void * shfunk ) {
6 57 : if( FD_UNLIKELY( !ljoin ) ) {
7 0 : FD_LOG_WARNING(( "NULL ljoin" ));
8 0 : return NULL;
9 0 : }
10 57 : if( FD_UNLIKELY( !shfunk ) ) {
11 0 : FD_LOG_WARNING(( "NULL shfunk" ));
12 0 : return NULL;
13 0 : }
14 :
15 57 : memset( ljoin, 0, sizeof(fd_accdb_user_t) );
16 57 : if( FD_UNLIKELY( !fd_funk_join( ljoin->funk, shfunk ) ) ) {
17 0 : FD_LOG_CRIT(( "fd_funk_join failed" ));
18 0 : }
19 :
20 57 : return ljoin;
21 57 : }
22 :
23 : void *
24 : fd_accdb_user_leave( fd_accdb_user_t * user,
25 15 : void ** opt_shfunk ) {
26 15 : if( FD_UNLIKELY( !user ) ) FD_LOG_CRIT(( "NULL ljoin" ));
27 :
28 15 : if( FD_UNLIKELY( !fd_funk_leave( user->funk, opt_shfunk ) ) ) FD_LOG_CRIT(( "fd_funk_leave failed" ));
29 :
30 15 : return user;
31 15 : }
32 :
33 : static int
34 : fd_accdb_has_xid( fd_accdb_user_t const * accdb,
35 465 : fd_funk_txn_xid_t const * rec_xid ) {
36 : /* FIXME unroll this a little */
37 465 : ulong const fork_depth = accdb->fork_depth;
38 471 : for( ulong i=0UL; i<fork_depth; i++ ) {
39 471 : if( fd_funk_txn_xid_eq( &accdb->fork[i], rec_xid ) ) return 1;
40 471 : }
41 0 : return 0;
42 465 : }
43 :
44 : static int
45 : fd_accdb_search_chain( fd_accdb_user_t const * accdb,
46 : ulong chain_idx,
47 : fd_funk_rec_key_t const * key,
48 507 : fd_funk_rec_t ** out_rec ) {
49 507 : *out_rec = NULL;
50 :
51 507 : fd_funk_rec_map_shmem_t const * shmap = accdb->funk->rec_map->map;
52 507 : fd_funk_rec_map_shmem_private_chain_t const * chain_tbl = fd_funk_rec_map_shmem_private_chain_const( shmap, 0UL );
53 507 : fd_funk_rec_map_shmem_private_chain_t const * chain = chain_tbl + chain_idx;
54 507 : fd_funk_rec_t * rec_tbl = accdb->funk->rec_pool->ele;
55 507 : ulong rec_max = fd_funk_rec_pool_ele_max( accdb->funk->rec_pool );
56 507 : ulong ver_cnt = FD_VOLATILE_CONST( chain->ver_cnt );
57 :
58 : /* Start a speculative transaction for the chain containing revisions
59 : of the account key we are looking for. */
60 507 : ulong cnt = fd_funk_rec_map_private_vcnt_cnt( ver_cnt );
61 507 : if( FD_UNLIKELY( fd_funk_rec_map_private_vcnt_ver( ver_cnt )&1 ) ) {
62 0 : return FD_MAP_ERR_AGAIN; /* chain is locked */
63 0 : }
64 507 : FD_COMPILER_MFENCE();
65 507 : uint ele_idx = chain->head_cidx;
66 :
67 : /* Walk the map chain, bail at the first entry
68 : (Each chain is sorted newest-to-oldest) */
69 507 : fd_funk_rec_t * best = NULL;
70 507 : for( ulong i=0UL; i<cnt; i++, ele_idx=rec_tbl[ ele_idx ].map_next ) {
71 465 : fd_funk_rec_t * rec = &rec_tbl[ ele_idx ];
72 :
73 : /* Skip over unrelated records (hash collision) */
74 465 : if( FD_UNLIKELY( !fd_funk_rec_key_eq( rec->pair.key, key ) ) ) continue;
75 :
76 : /* Confirm that record is part of the current fork
77 : FIXME this has bad performance / pointer-chasing */
78 465 : if( FD_UNLIKELY( !fd_accdb_has_xid( accdb, rec->pair.xid ) ) ) continue;
79 :
80 465 : if( FD_UNLIKELY( rec->map_next==ele_idx ) ) {
81 0 : FD_LOG_CRIT(( "fd_accdb_search_chain detected cycle" ));
82 0 : }
83 465 : if( rec->map_next > rec_max ) {
84 465 : if( FD_UNLIKELY( !fd_funk_rec_map_private_idx_is_null( rec->map_next ) ) ) {
85 0 : FD_LOG_CRIT(( "fd_accdb_search_chain detected memory corruption: rec->map_next %u is out of bounds (rec_max %lu)",
86 0 : rec->map_next, rec_max ));
87 0 : }
88 465 : }
89 465 : best = rec;
90 465 : break;
91 465 : }
92 :
93 : /* Retry if we were overrun */
94 507 : if( FD_UNLIKELY( FD_VOLATILE_CONST( chain->ver_cnt )!=ver_cnt ) ) {
95 0 : return FD_MAP_ERR_AGAIN;
96 0 : }
97 :
98 507 : *out_rec = best;
99 507 : return FD_MAP_SUCCESS;
100 507 : }
101 :
102 : static void
103 : fd_accdb_load_fork_slow( fd_accdb_user_t * accdb,
104 45 : fd_funk_txn_xid_t const * xid ) {
105 45 : fd_funk_txn_xid_t next_xid = *xid;
106 :
107 : /* Walk transaction graph, recovering from overruns on-the-fly */
108 45 : accdb->fork_depth = 0UL;
109 :
110 45 : ulong txn_max = fd_funk_txn_pool_ele_max( accdb->funk->txn_pool );
111 45 : ulong i;
112 48 : for( i=0UL; i<FD_ACCDB_DEPTH_MAX; i++ ) {
113 48 : fd_funk_txn_map_query_t query[1];
114 48 : fd_funk_txn_t const * candidate;
115 48 : fd_funk_txn_xid_t found_xid;
116 48 : ulong parent_idx;
117 48 : fd_funk_txn_xid_t parent_xid;
118 48 : retry:
119 : /* Speculatively look up transaction from map */
120 48 : for(;;) {
121 48 : int query_err = fd_funk_txn_map_query_try( accdb->funk->txn_map, &next_xid, NULL, query, 0 );
122 48 : if( FD_UNLIKELY( query_err==FD_MAP_ERR_AGAIN ) ) {
123 : /* FIXME random backoff */
124 0 : FD_SPIN_PAUSE();
125 0 : continue;
126 0 : }
127 48 : if( query_err==FD_MAP_ERR_KEY ) goto done;
128 48 : if( FD_UNLIKELY( query_err!=FD_MAP_SUCCESS ) ) {
129 0 : FD_LOG_CRIT(( "fd_funk_txn_map_query_try failed: %i-%s", query_err, fd_map_strerror( query_err ) ));
130 0 : }
131 48 : break;
132 48 : }
133 :
134 : /* Lookup parent transaction while recovering from overruns
135 : FIXME This would be a lot easier if transactions specified
136 : parent by XID instead of by pointer ... */
137 48 : candidate = fd_funk_txn_map_query_ele_const( query );
138 48 : FD_COMPILER_MFENCE();
139 48 : do {
140 48 : found_xid = FD_VOLATILE_CONST( candidate->xid );
141 48 : parent_idx = fd_funk_txn_idx( FD_VOLATILE_CONST( candidate->parent_cidx ) );
142 48 : if( parent_idx<txn_max ) {
143 3 : FD_COMPILER_MFENCE();
144 3 : fd_funk_txn_t const * parent = &accdb->funk->txn_pool->ele[ parent_idx ];
145 3 : parent_xid = FD_VOLATILE_CONST( parent->xid );
146 3 : FD_COMPILER_MFENCE();
147 3 : }
148 48 : parent_idx = fd_funk_txn_idx( FD_VOLATILE_CONST( candidate->parent_cidx ) );
149 48 : } while(0);
150 48 : FD_COMPILER_MFENCE();
151 :
152 : /* Verify speculative loads by ensuring txn still exists in map */
153 48 : if( FD_UNLIKELY( fd_funk_txn_map_query_test( query )!=FD_MAP_SUCCESS ) ) {
154 0 : FD_SPIN_PAUSE();
155 0 : goto retry;
156 0 : }
157 :
158 48 : if( FD_UNLIKELY( !fd_funk_txn_xid_eq( &found_xid, &next_xid ) ) ) {
159 0 : FD_LOG_CRIT(( "fd_accdb_load_fork_slow detected memory corruption: expected xid %lu:%lu at %p, found %lu:%lu",
160 0 : next_xid.ul[0], next_xid.ul[1],
161 0 : (void *)candidate,
162 0 : found_xid.ul[0], found_xid.ul[1] ));
163 0 : }
164 :
165 48 : accdb->fork[ i ] = next_xid;
166 48 : if( fd_funk_txn_idx_is_null( parent_idx ) ) {
167 : /* Reached root */
168 45 : i++;
169 45 : break;
170 45 : }
171 3 : next_xid = parent_xid;
172 3 : }
173 :
174 45 : done:
175 45 : accdb->fork_depth = i;
176 :
177 : /* FIXME crash if fork depth greater than cache depth */
178 45 : if( accdb->fork_depth < FD_ACCDB_DEPTH_MAX ) {
179 45 : fd_funk_txn_xid_set_root( &accdb->fork[ accdb->fork_depth++ ] );
180 45 : }
181 :
182 : /* Remember tip fork */
183 45 : fd_funk_txn_t * tip = fd_funk_txn_query( xid, accdb->funk->txn_map );
184 45 : ulong tip_idx = tip ? (ulong)( tip-accdb->funk->txn_pool->ele ) : ULONG_MAX;
185 45 : accdb->tip_txn_idx = tip_idx;
186 45 : if( tip ) fd_funk_txn_state_assert( tip, FD_FUNK_TXN_STATE_ACTIVE );
187 45 : }
188 :
189 : static inline void
190 : fd_accdb_load_fork( fd_accdb_user_t * accdb,
191 507 : fd_funk_txn_xid_t const * xid ) {
192 : /* Skip if already on the correct fork */
193 507 : if( FD_LIKELY( (!!accdb->fork_depth) & (!!fd_funk_txn_xid_eq( &accdb->fork[ 0 ], xid ) ) ) ) return;
194 45 : if( FD_UNLIKELY( accdb->rw_active ) ) {
195 0 : FD_LOG_CRIT(( "Invariant violation: all active account references of an accdb_user must be accessed through the same XID (active XID %lu:%lu, requested XID %lu:%lu)",
196 0 : accdb->fork[0].ul[0], accdb->fork[0].ul[1],
197 0 : xid ->ul[0], xid ->ul[1] ));
198 0 : }
199 45 : fd_accdb_load_fork_slow( accdb, xid ); /* switch fork */
200 45 : }
201 :
202 : static fd_accdb_peek_t *
203 : fd_accdb_peek1( fd_accdb_user_t * accdb,
204 : fd_accdb_peek_t * peek,
205 : fd_funk_txn_xid_t const * xid,
206 507 : void const * address ) {
207 507 : fd_funk_t const * funk = accdb->funk;
208 507 : fd_funk_rec_key_t key[1]; memcpy( key->uc, address, 32UL );
209 :
210 : /* Hash key to chain */
211 507 : fd_funk_xid_key_pair_t pair[1];
212 507 : fd_funk_txn_xid_copy( pair->xid, xid );
213 507 : fd_funk_rec_key_copy( pair->key, key );
214 507 : fd_funk_rec_map_t const * rec_map = funk->rec_map;
215 507 : ulong hash = fd_funk_rec_map_key_hash( pair, rec_map->map->seed );
216 507 : ulong chain_idx = (hash & (rec_map->map->chain_cnt-1UL) );
217 :
218 : /* Traverse chain for candidate */
219 507 : fd_funk_rec_t * rec = NULL;
220 507 : for(;;) {
221 507 : int err = fd_accdb_search_chain( accdb, chain_idx, key, &rec );
222 507 : if( FD_LIKELY( err==FD_MAP_SUCCESS ) ) break;
223 0 : FD_SPIN_PAUSE();
224 : /* FIXME backoff */
225 0 : }
226 507 : if( !rec ) return NULL;
227 :
228 465 : *peek = (fd_accdb_peek_t) {
229 465 : .acc = {{
230 465 : .rec = rec,
231 465 : .meta = fd_funk_val( rec, funk->wksp )
232 465 : }},
233 465 : .spec = {{
234 465 : .key = *key,
235 465 : .keyp = rec->pair.key
236 465 : }}
237 465 : };
238 465 : return peek;
239 507 : }
240 :
241 : fd_accdb_peek_t *
242 : fd_accdb_peek( fd_accdb_user_t * accdb,
243 : fd_accdb_peek_t * peek,
244 : fd_funk_txn_xid_t const * xid,
245 0 : void const * address ) {
246 0 : if( FD_UNLIKELY( !accdb || !accdb->funk->shmem ) ) FD_LOG_CRIT(( "NULL accdb" ));
247 0 : fd_accdb_load_fork( accdb, xid );
248 0 : return fd_accdb_peek1( accdb, peek, xid, address );
249 0 : }
250 :
251 : static void
252 : fd_accdb_copy_account( fd_account_meta_t * out_meta,
253 : void * out_data,
254 3 : fd_accdb_ro_t const * acc ) {
255 3 : memset( out_meta, 0, sizeof(fd_account_meta_t) );
256 3 : out_meta->lamports = fd_accdb_ref_lamports( acc );
257 3 : if( FD_LIKELY( out_meta->lamports ) ) {
258 3 : memcpy( out_meta->owner, fd_accdb_ref_owner( acc ), 32UL );
259 3 : out_meta->executable = !!fd_accdb_ref_exec_bit( acc );
260 3 : out_meta->dlen = (uint)fd_accdb_ref_data_sz( acc );
261 3 : fd_memcpy( out_data, fd_accdb_ref_data_const( acc ), out_meta->dlen );
262 3 : }
263 3 : }
264 :
265 : /* fd_accdb_prep_create preps a writable handle for a newly created
266 : account. */
267 :
268 : static fd_accdb_rw_t *
269 : fd_accdb_prep_create( fd_accdb_rw_t * rw,
270 : fd_accdb_user_t * accdb,
271 : fd_funk_txn_xid_t const * xid,
272 : void const * address,
273 : void * val,
274 : ulong val_sz,
275 45 : ulong val_max ) {
276 45 : fd_funk_rec_t * rec = fd_funk_rec_pool_acquire( accdb->funk->rec_pool, NULL, 1, NULL );
277 45 : if( FD_UNLIKELY( !rec ) ) FD_LOG_CRIT(( "Failed to modify account: DB record pool is out of memory" ));
278 :
279 45 : memset( rec, 0, sizeof(fd_funk_rec_t) );
280 45 : rec->val_gaddr = fd_wksp_gaddr_fast( accdb->funk->wksp, val );
281 45 : rec->val_sz = (uint)( fd_ulong_min( val_sz, FD_FUNK_REC_VAL_MAX ) & FD_FUNK_REC_VAL_MAX );
282 45 : rec->val_max = (uint)( fd_ulong_min( val_max, FD_FUNK_REC_VAL_MAX ) & FD_FUNK_REC_VAL_MAX );
283 45 : memcpy( rec->pair.key->uc, address, 32UL );
284 45 : fd_funk_txn_xid_copy( rec->pair.xid, xid );
285 45 : rec->tag = 0;
286 45 : rec->prev_idx = FD_FUNK_REC_IDX_NULL;
287 45 : rec->next_idx = FD_FUNK_REC_IDX_NULL;
288 :
289 45 : fd_account_meta_t * meta = val;
290 45 : meta->slot = xid->ul[0];
291 :
292 45 : accdb->rw_active++;
293 45 : *rw = (fd_accdb_rw_t) {
294 45 : .rec = rec,
295 45 : .meta = meta,
296 45 : .published = 0
297 45 : };
298 45 : return rw;
299 45 : }
300 :
301 : /* fd_accdb_prep_inplace preps a writable handle for a mutable record. */
302 :
303 : static fd_accdb_rw_t *
304 : fd_accdb_prep_inplace( fd_accdb_rw_t * rw,
305 : fd_accdb_user_t * accdb,
306 462 : fd_funk_rec_t * rec ) {
307 : /* Take the opportunity to run some validation checks */
308 462 : if( FD_UNLIKELY( !rec->val_gaddr ) ) {
309 0 : FD_LOG_CRIT(( "Failed to prepare in-place account write: rec %p is not allocated", (void *)rec ));
310 0 : }
311 :
312 462 : accdb->rw_active++;
313 462 : *rw = (fd_accdb_rw_t) {
314 462 : .rec = rec,
315 462 : .meta = fd_funk_val( rec, accdb->funk->wksp ),
316 462 : .published = 1
317 462 : };
318 462 : if( FD_UNLIKELY( !rw->meta->lamports ) ) {
319 9 : memset( rw->meta, 0, sizeof(fd_account_meta_t) );
320 9 : }
321 462 : return rw;
322 462 : }
323 :
324 : fd_accdb_rw_t *
325 : fd_accdb_modify_prepare( fd_accdb_user_t * accdb,
326 : fd_accdb_rw_t * rw,
327 : fd_funk_txn_xid_t const * xid,
328 : void const * address,
329 : ulong const data_min,
330 507 : int do_create ) {
331 : /* Pivot to different fork */
332 :
333 507 : fd_accdb_load_fork( accdb, xid );
334 507 : ulong txn_idx = accdb->tip_txn_idx;
335 507 : if( FD_UNLIKELY( txn_idx==ULONG_MAX ) ) {
336 0 : FD_LOG_CRIT(( "fd_accdb_modify_prepare failed: XID %lu:%lu is rooted", xid->ul[0], xid->ul[1] ));
337 0 : }
338 507 : if( FD_UNLIKELY( txn_idx >= fd_funk_txn_pool_ele_max( accdb->funk->txn_pool ) ) ) {
339 0 : FD_LOG_CRIT(( "memory corruption detected: invalid txn_idx %lu (max %lu)",
340 0 : txn_idx, fd_funk_txn_pool_ele_max( accdb->funk->txn_pool ) ));
341 0 : }
342 507 : fd_funk_txn_t * txn = &accdb->funk->txn_pool->ele[ txn_idx ];
343 507 : if( FD_UNLIKELY( !fd_funk_txn_xid_eq( &txn->xid, xid ) ) ) {
344 0 : FD_LOG_CRIT(( "Failed to modify account: data race detected on fork node (expected XID %lu:%lu, found %lu:%lu)",
345 0 : xid->ul[0], xid->ul[1],
346 0 : txn->xid.ul[0], txn->xid.ul[1] ));
347 0 : }
348 507 : if( FD_UNLIKELY( fd_funk_txn_is_frozen( txn ) ) ) {
349 0 : FD_LOG_CRIT(( "Failed to modify account: XID %lu:%lu has children/is frozen", xid->ul[0], xid->ul[1] ));
350 0 : }
351 :
352 : /* Query old record value */
353 :
354 507 : fd_accdb_peek_t peek[1];
355 507 : if( FD_UNLIKELY( !fd_accdb_peek1( accdb, peek, xid, address ) ) ) {
356 :
357 : /* Record not found */
358 42 : if( !do_create ) return NULL;
359 42 : ulong val_sz_min = sizeof(fd_account_meta_t)+data_min;
360 42 : ulong val_sz = data_min;
361 42 : ulong val_max = 0UL;
362 42 : void * val = fd_alloc_malloc_at_least( accdb->funk->alloc, 16UL, val_sz_min, &val_max );
363 42 : if( FD_UNLIKELY( !val ) ) {
364 0 : FD_LOG_CRIT(( "Failed to modify account: out of memory allocating %lu bytes", data_min ));
365 0 : }
366 42 : fd_memset( val, 0, val_sz_min );
367 42 : return fd_accdb_prep_create( rw, accdb, xid, address, val, val_sz, val_max );
368 :
369 465 : } else if( fd_funk_txn_xid_eq( peek->acc->rec->pair.xid, xid ) ) {
370 :
371 : /* Mutable record found, modify in-place */
372 462 : fd_funk_rec_t * rec = (void *)( peek->acc->ref->rec_laddr );
373 462 : ulong acc_orig_sz = fd_accdb_ref_data_sz( peek->acc );
374 462 : ulong val_sz_min = sizeof(fd_account_meta_t)+fd_ulong_max( data_min, acc_orig_sz );
375 462 : void * val = fd_funk_val_truncate( rec, accdb->funk->alloc, accdb->funk->wksp, 16UL, val_sz_min, NULL );
376 462 : if( FD_UNLIKELY( !val ) ) {
377 0 : FD_LOG_CRIT(( "Failed to modify account: out of memory allocating %lu bytes", acc_orig_sz ));
378 0 : }
379 462 : return fd_accdb_prep_inplace( rw, accdb, rec );
380 :
381 462 : } else {
382 :
383 : /* Frozen record found, copy out to new object */
384 3 : ulong acc_orig_sz = fd_accdb_ref_data_sz( peek->acc );
385 3 : ulong val_sz_min = sizeof(fd_account_meta_t)+fd_ulong_max( data_min, acc_orig_sz );
386 3 : ulong val_sz = peek->acc->rec->val_sz;
387 3 : ulong val_max = 0UL;
388 3 : void * val = fd_alloc_malloc_at_least( accdb->funk->alloc, 16UL, val_sz_min, &val_max );
389 3 : if( FD_UNLIKELY( !val ) ) {
390 0 : FD_LOG_CRIT(( "Failed to modify account: out of memory allocating %lu bytes", acc_orig_sz ));
391 0 : }
392 :
393 3 : fd_account_meta_t * meta = val;
394 3 : uchar * data = (uchar *)( meta+1 );
395 3 : ulong data_max = val_max - sizeof(fd_account_meta_t);
396 3 : fd_accdb_copy_account( meta, data, peek->acc );
397 3 : if( acc_orig_sz<data_max ) {
398 : /* Zero out trailing data */
399 3 : uchar * tail = data +acc_orig_sz;
400 3 : ulong tail_sz = data_max-acc_orig_sz;
401 3 : fd_memset( tail, 0, tail_sz );
402 3 : }
403 3 : if( FD_UNLIKELY( !fd_accdb_peek_test( peek ) ) ) {
404 0 : FD_LOG_CRIT(( "Failed to modify account: data race detected, account was removed while being read" ));
405 0 : }
406 :
407 3 : return fd_accdb_prep_create( rw, accdb, xid, address, val, val_sz, val_max );
408 :
409 3 : }
410 507 : }
411 :
412 : void
413 : fd_accdb_write_publish( fd_accdb_user_t * accdb,
414 0 : fd_accdb_rw_t * write ) {
415 0 : if( FD_UNLIKELY( !accdb->rw_active ) ) {
416 0 : FD_LOG_CRIT(( "Failed to modify account: ref count underflow" ));
417 0 : }
418 :
419 0 : if( !write->published ) {
420 0 : if( FD_UNLIKELY( accdb->tip_txn_idx==ULONG_MAX ) ) {
421 0 : FD_LOG_CRIT(( "accdb_user corrupt: not joined to a transaction" ));
422 0 : }
423 0 : fd_funk_txn_t * txn = accdb->funk->txn_pool->ele + accdb->tip_txn_idx;
424 0 : fd_funk_rec_prepare_t prepare = {
425 0 : .rec = write->rec,
426 0 : .rec_head_idx = &txn->rec_head_idx,
427 0 : .rec_tail_idx = &txn->rec_tail_idx
428 0 : };
429 0 : fd_funk_rec_publish( accdb->funk, &prepare );
430 0 : }
431 :
432 0 : accdb->rw_active--;
433 0 : }
|