Line data Source code
1 : #include "fd_wksp_mon.h" 2 : #include "../../tango/tempo/fd_tempo.h" 3 : #include "../../util/wksp/fd_wksp_private.h" 4 : #include <stddef.h> 5 : 6 : #if FD_HAS_AVX 7 : #include <immintrin.h> 8 : #endif 9 : 10 : fd_wksp_mon_t * 11 : fd_wksp_mon_init( fd_wksp_mon_t * mon, 12 : fd_wksp_t * wksp, 13 : ulong bytes_per_sec, 14 0 : long now ) { 15 0 : fd_memset( mon, 0, sizeof(fd_wksp_mon_t) ); 16 : 17 0 : double tick_per_ns = fd_tempo_tick_per_ns( NULL ); 18 0 : double ticks_per_byte = tick_per_ns * 1e9 / (double)bytes_per_sec; 19 0 : ulong ticks_per_part = (ulong)(ticks_per_byte * (double)FD_WKSP_PRIVATE_PINFO_FOOTPRINT); 20 : 21 : /* Ensure at most 5 full sweeps per second by lowering the effective 22 : rate for small workspaces. min_ticks_per_part is the rate at which 23 : a full sweep takes exactly 200ms. */ 24 : 25 0 : ulong part_max = wksp->part_max; 26 0 : FD_TEST( part_max ); 27 0 : ulong min_ticks_per_part = (ulong)(tick_per_ns * 200e6 / (double)part_max); 28 0 : if( ticks_per_part<min_ticks_per_part ) ticks_per_part = min_ticks_per_part; 29 0 : if( FD_UNLIKELY( !ticks_per_part ) ) ticks_per_part = 1UL; 30 : 31 0 : mon->wksp = wksp; 32 0 : mon->part_max = part_max; 33 0 : mon->ticks_per_part = ticks_per_part; 34 0 : mon->last_tick = now; 35 0 : return mon; 36 0 : } 37 : 38 : void * 39 0 : fd_wksp_mon_fini( fd_wksp_mon_t * mon ) { 40 0 : fd_memset( mon, 0, sizeof(fd_wksp_mon_t) ); 41 0 : return (void *)mon; 42 0 : } 43 : 44 : fd_wksp_mon_t * 45 : fd_wksp_mon_tick( fd_wksp_mon_t * mon, 46 0 : long now ) { 47 : 48 0 : ulong part_max = mon->part_max; 49 : 50 0 : mon->tick_rem += (now - mon->last_tick); 51 0 : mon->last_tick = now; 52 : 53 0 : if( FD_UNLIKELY( !part_max ) ) return mon; 54 : 55 0 : ulong ticks_per_part = mon->ticks_per_part; 56 0 : if( FD_UNLIKELY( mon->tick_rem<(long)ticks_per_part ) ) return mon; 57 : 58 0 : ulong part_budget = (ulong)mon->tick_rem / ticks_per_part; 59 0 : part_budget = fd_ulong_min( part_budget, FD_WKSP_MON_BURST_MAX ); 60 : 61 0 : ulong scan_idx = mon->scan_idx; 62 0 : part_budget = fd_ulong_min( part_budget, part_max - scan_idx ); 63 : 64 0 : mon->tick_rem -= (long)(part_budget * ticks_per_part); 65 : 66 0 : fd_wksp_private_pinfo_t const * pinfo = fd_wksp_private_pinfo_const( mon->wksp ); 67 : 68 0 : ulong acc_free_cnt = mon->acc_free_cnt; 69 0 : ulong acc_free_sz = mon->acc_free_sz; 70 0 : ulong acc_free_max_sz = mon->acc_free_max_sz; 71 0 : ulong acc_used_cnt = mon->acc_used_cnt; 72 0 : ulong acc_used_sz = mon->acc_used_sz; 73 : 74 0 : ulong scan_end = scan_idx + part_budget; 75 : 76 0 : FD_STATIC_ASSERT( offsetof(fd_wksp_private_pinfo_t, gaddr_lo)== 0UL, layout ); 77 0 : FD_STATIC_ASSERT( offsetof(fd_wksp_private_pinfo_t, gaddr_hi)== 8UL, layout ); 78 0 : FD_STATIC_ASSERT( offsetof(fd_wksp_private_pinfo_t, tag )==16UL, layout ); 79 : 80 0 : # if FD_HAS_AVX 81 0 : for( ulong i=scan_idx; i<scan_end; i++ ) { 82 0 : __m256i v = _mm256_stream_load_si256( (__m256i const *)(pinfo + i) ); 83 0 : ulong tmp[4] __attribute__((aligned(32))); 84 0 : _mm256_store_si256( (__m256i *)tmp, v ); 85 0 : ulong gaddr_lo = tmp[0]; 86 0 : ulong gaddr_hi = tmp[1]; 87 0 : ulong part_tag = tmp[2]; 88 0 : if( FD_UNLIKELY( gaddr_hi<=gaddr_lo ) ) continue; 89 0 : ulong part_sz = gaddr_hi - gaddr_lo; 90 0 : if( !part_tag ) { 91 0 : acc_free_cnt++; 92 0 : acc_free_sz += part_sz; 93 0 : if( part_sz>acc_free_max_sz ) acc_free_max_sz = part_sz; 94 0 : } else { 95 0 : acc_used_cnt++; 96 0 : acc_used_sz += part_sz; 97 0 : mon->acc_used_hist[ fd_ulong_find_msb( part_sz ) ]++; 98 0 : } 99 0 : } 100 : # else 101 : for( ulong i=scan_idx; i<scan_end; i++ ) { 102 : FD_COMPILER_MFENCE(); 103 : ulong gaddr_lo = pinfo[ i ].gaddr_lo; 104 : ulong gaddr_hi = pinfo[ i ].gaddr_hi; 105 : ulong part_tag = pinfo[ i ].tag; 106 : FD_COMPILER_MFENCE(); 107 : if( FD_UNLIKELY( gaddr_hi<=gaddr_lo ) ) continue; 108 : ulong part_sz = gaddr_hi - gaddr_lo; 109 : if( !part_tag ) { 110 : acc_free_cnt++; 111 : acc_free_sz += part_sz; 112 : if( part_sz>acc_free_max_sz ) acc_free_max_sz = part_sz; 113 : } else { 114 : acc_used_cnt++; 115 : acc_used_sz += part_sz; 116 : mon->acc_used_hist[ fd_ulong_find_msb( part_sz ) ]++; 117 : } 118 : } 119 : # endif 120 : 121 0 : mon->acc_free_cnt = acc_free_cnt; 122 0 : mon->acc_free_sz = acc_free_sz; 123 0 : mon->acc_free_max_sz = acc_free_max_sz; 124 0 : mon->acc_used_cnt = acc_used_cnt; 125 0 : mon->acc_used_sz = acc_used_sz; 126 : 127 0 : scan_idx = scan_end; 128 0 : if( scan_idx==part_max ) { 129 0 : mon->free_cnt = acc_free_cnt; 130 0 : mon->free_sz = acc_free_sz; 131 0 : mon->free_max_sz = acc_free_max_sz; 132 : 133 : /* Walk log2 histogram to find median used partition size. 134 : Linearly interpolate within the median bucket. */ 135 0 : ulong median_sz = 0UL; 136 0 : if( acc_used_cnt ) { 137 0 : ulong half = acc_used_cnt / 2UL; 138 0 : ulong cum = 0UL; 139 0 : ulong mb = 0UL; 140 0 : for( ulong b=0UL; b<64UL; b++ ) { 141 0 : cum += mon->acc_used_hist[b]; 142 0 : if( cum>half ) { mb = b; break; } 143 0 : } 144 0 : float lo = (float)(1UL<<mb); 145 0 : float hi = (mb<63UL) ? (float)(1UL<<(mb+1UL)) : lo; 146 0 : float below = (float)(cum - mon->acc_used_hist[mb]); 147 0 : float bucket_cnt = (float)mon->acc_used_hist[mb]; 148 0 : float rank = (float)half - below; 149 0 : median_sz = (ulong)(lo + (hi - lo) * rank / bucket_cnt); 150 0 : } 151 0 : mon->part_median_sz = median_sz; 152 0 : mon->part_mean_sz = acc_used_cnt ? (acc_used_sz / acc_used_cnt) : 0UL; 153 : 154 0 : mon->sweep_cnt++; 155 0 : mon->acc_free_cnt = 0UL; 156 0 : mon->acc_free_sz = 0UL; 157 0 : mon->acc_free_max_sz = 0UL; 158 0 : mon->acc_used_cnt = 0UL; 159 0 : mon->acc_used_sz = 0UL; 160 0 : fd_memset( mon->acc_used_hist, 0, sizeof(mon->acc_used_hist) ); 161 0 : scan_idx = 0UL; 162 0 : } 163 : 164 0 : mon->scan_idx = scan_idx; 165 0 : return mon; 166 0 : }