Line data Source code
1 : #include "fd_repair_metrics.h" 2 : #include <stdio.h> 3 : 4 : #include "../../disco/metrics/fd_metrics.h" 5 : 6 : void * 7 0 : fd_repair_metrics_new( void * mem ) { 8 0 : fd_repair_metrics_t * repair_metrics = (fd_repair_metrics_t *)mem; 9 0 : repair_metrics->st = UINT_MAX; 10 0 : repair_metrics->en = UINT_MAX; 11 0 : repair_metrics->turbine_slot0 = 0; 12 : 13 0 : return repair_metrics; 14 0 : } 15 : 16 : fd_repair_metrics_t * 17 0 : fd_repair_metrics_join( void * repair_metrics ) { 18 0 : return (fd_repair_metrics_t *)repair_metrics; 19 0 : } 20 : 21 : void 22 0 : fd_repair_metrics_set_turbine_slot0( fd_repair_metrics_t * repair_metrics, ulong turbine_slot0 ) { 23 0 : repair_metrics->turbine_slot0 = turbine_slot0; 24 0 : } 25 : 26 : void 27 : fd_repair_metrics_add_slot( fd_repair_metrics_t * repair_metrics, 28 : ulong slot, 29 : long first_ts, 30 : long slot_complete_ts, 31 : uint repair_cnt, 32 0 : uint turbine_cnt ) { 33 0 : uint next_en = (repair_metrics->en + 1) % FD_CATCHUP_METRICS_MAX; 34 0 : if( FD_UNLIKELY( next_en == repair_metrics->st || repair_metrics->st == UINT_MAX ) ) { 35 0 : repair_metrics->st = (repair_metrics->st + 1) % FD_CATCHUP_METRICS_MAX; 36 0 : } 37 0 : repair_metrics->slots[ next_en ].slot = slot; 38 0 : repair_metrics->slots[ next_en ].first_ts = first_ts; 39 0 : repair_metrics->slots[ next_en ].slot_complete_ts = slot_complete_ts; 40 0 : repair_metrics->slots[ next_en ].repair_cnt = repair_cnt; 41 0 : repair_metrics->slots[ next_en ].turbine_cnt = turbine_cnt; 42 0 : repair_metrics->en = next_en; 43 : 44 : # if DEBUG_LOGGING 45 : if( FD_UNLIKELY( slot == repair_metrics->turbine_slot0 ) ) { 46 : fd_repair_metrics_print( repair_metrics ); 47 : } 48 : # endif 49 0 : } 50 : 51 0 : #define MAX_WIDTH 120 52 : static char dashes[MAX_WIDTH + 1] = "========================================================================================================================"; 53 : static char spaces[MAX_WIDTH + 1] = " "; 54 : 55 : static void 56 0 : print_catchup_stats( fd_repair_metrics_t * repair_metrics ) { 57 0 : long min_ts = repair_metrics->slots[ repair_metrics->st ].first_ts; 58 0 : long turbine_ts = 0; 59 0 : long slot_cmpl_time_total = 0; 60 0 : long prev_slot_cmpl_ts = LONG_MAX; 61 0 : long incr_slot_cmpl_total = 0; 62 : 63 0 : uint catchup_cnt = 0; 64 0 : for( uint i = repair_metrics->st;; i = (i + 1) % FD_CATCHUP_METRICS_MAX ) { 65 0 : ulong cur_slot = repair_metrics->slots[ i ].slot; 66 0 : long slot_cmpl_ts = repair_metrics->slots[ i ].slot_complete_ts; 67 0 : long slot_first_ts = repair_metrics->slots[ i ].first_ts; 68 : 69 0 : min_ts = fd_min( min_ts, slot_first_ts ); 70 : 71 0 : if( cur_slot <= repair_metrics->turbine_slot0 ) { 72 0 : slot_cmpl_time_total += (slot_cmpl_ts - slot_first_ts); 73 0 : catchup_cnt++; 74 0 : } 75 0 : if( FD_UNLIKELY( cur_slot == repair_metrics->turbine_slot0 ) ) { 76 0 : turbine_ts = slot_cmpl_ts; 77 0 : } 78 : 79 : /* incremental slot completion time */ 80 0 : if( cur_slot <= repair_metrics->turbine_slot0 && slot_cmpl_ts - prev_slot_cmpl_ts > 0 ) { 81 0 : incr_slot_cmpl_total += (slot_cmpl_ts - prev_slot_cmpl_ts); 82 0 : } 83 0 : prev_slot_cmpl_ts = slot_cmpl_ts; 84 0 : if( FD_UNLIKELY( i == repair_metrics->en ) ) break; 85 0 : } 86 : 87 0 : if( FD_LIKELY( turbine_ts > 0 ) ) { /* still have turbine slot0 in the catchup metrics */ 88 0 : double pipelined_time = (double)(turbine_ts - min_ts); 89 0 : FD_LOG_NOTICE(( "took %.3fs to reach first turbine.", fd_metrics_convert_ticks_to_seconds((ulong)pipelined_time) )); 90 : 91 : /* Compute pipeline factor */ 92 0 : double non_pipelined_time = (double)slot_cmpl_time_total; 93 0 : FD_LOG_NOTICE(( "pipeline factor: %.2f, avg incremental slot completion time: %.2f ms", 94 0 : non_pipelined_time / pipelined_time, 95 0 : (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)incr_slot_cmpl_total) / (double)catchup_cnt / 1e6 )); 96 0 : } 97 0 : } 98 : 99 : void 100 0 : fd_repair_metrics_print( fd_repair_metrics_t * repair_metrics, int verbose ) { 101 0 : long min_ts = repair_metrics->slots[ repair_metrics->st ].first_ts; 102 0 : long max_ts = repair_metrics->slots[ repair_metrics->en ].slot_complete_ts; 103 0 : int turbine0 = 0; 104 0 : uint cnt = 0; 105 : 106 0 : long slot_cmpl_duration_total = 0; 107 0 : for( uint i = repair_metrics->st;; i = (i + 1) % FD_CATCHUP_METRICS_MAX ) { 108 0 : cnt++; 109 0 : ulong cur_slot = repair_metrics->slots[ i ].slot; 110 0 : long slot_cmpl_ts = repair_metrics->slots[ i ].slot_complete_ts; 111 0 : long slot_first_ts = repair_metrics->slots[ i ].first_ts; 112 : 113 0 : min_ts = fd_min( min_ts, slot_first_ts ); 114 0 : max_ts = fd_max( max_ts, slot_cmpl_ts ); 115 0 : slot_cmpl_duration_total += (slot_cmpl_ts - slot_first_ts); 116 : 117 0 : if( FD_UNLIKELY( cur_slot == repair_metrics->turbine_slot0 ) ) turbine0 = 1; 118 0 : if( FD_UNLIKELY( i == repair_metrics->en ) ) break; 119 0 : } 120 : 121 : /* prints a stacked depth chart of the catchup metrics like this: 122 : slot |===============| (duration in ms) 123 : slot |================| 124 : etc. */ 125 : 126 0 : double tick_sz = (double)(max_ts - min_ts) / (double)MAX_WIDTH; 127 : 128 0 : for( uint i = repair_metrics->st;;i = (i + 1) % FD_CATCHUP_METRICS_MAX ) { 129 0 : long duration = repair_metrics->slots[ i ].slot_complete_ts - repair_metrics->slots[ i ].first_ts; 130 0 : int width = (int)((double)(duration) / tick_sz); 131 0 : int start = (int)((double)(repair_metrics->slots[ i ].first_ts - min_ts) / tick_sz); 132 0 : if( FD_UNLIKELY( verbose ) ) { 133 0 : printf( "%lu [repaired: %u/%u]%.*s|%.*s| (%.2f ms)", 134 0 : repair_metrics->slots[ i ].slot, 135 0 : repair_metrics->slots[ i ].repair_cnt, repair_metrics->slots[ i ].turbine_cnt + repair_metrics->slots[ i ].repair_cnt, 136 0 : start, spaces, width, dashes, 137 0 : (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)duration) / 1e6 ); 138 0 : } else { 139 0 : printf( "%lu %.*s|%.*s| (%.2f ms)", 140 0 : repair_metrics->slots[ i ].slot, 141 0 : start, spaces, width, dashes, 142 0 : (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)duration) / 1e6 ); 143 0 : } 144 : 145 0 : if( FD_UNLIKELY( repair_metrics->slots[ i ].slot == repair_metrics->turbine_slot0 ) ) { 146 0 : printf( " <--- (first turbine shred received)" ); 147 0 : } 148 0 : printf( "\n" ); 149 0 : if( i == repair_metrics->en ) break; 150 0 : } 151 0 : fflush( stdout ); 152 : 153 0 : FD_LOG_NOTICE(( "Showing past %u slots, avg slot duration %.2f ms", cnt, (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)slot_cmpl_duration_total) / (double)cnt / 1e6 )); 154 0 : if( FD_UNLIKELY( turbine0 ) ) { /* still have turbine slot0 in the catchup metrics */ 155 0 : print_catchup_stats( repair_metrics ); 156 0 : } 157 0 : } 158 : 159 : #undef MAX_WIDTH 160 :