LCOV - code coverage report
Current view: top level - discof/repair - fd_repair_metrics.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 151 0.0 %
Date: 2025-12-07 04:58:33 Functions: 0 7 0.0 %

          Line data    Source code
       1             : #include "fd_repair_metrics.h"
       2             : #include <stdio.h>
       3             : #include <stdlib.h>
       4             : 
       5             : #include "../../disco/metrics/fd_metrics.h"
       6             : 
       7             : void *
       8           0 : fd_repair_metrics_new( void * mem ) {
       9           0 :   fd_repair_metrics_t * repair_metrics = (fd_repair_metrics_t *)mem;
      10           0 :   repair_metrics->st = UINT_MAX;
      11           0 :   repair_metrics->en = UINT_MAX;
      12           0 :   repair_metrics->turbine_slot0 = 0;
      13             : 
      14           0 :   return repair_metrics;
      15           0 : }
      16             : 
      17             : fd_repair_metrics_t *
      18           0 : fd_repair_metrics_join( void * repair_metrics ) {
      19           0 :   return (fd_repair_metrics_t *)repair_metrics;
      20           0 : }
      21             : 
      22             : void
      23           0 : fd_repair_metrics_set_turbine_slot0( fd_repair_metrics_t * repair_metrics, ulong turbine_slot0 ) {
      24           0 :   repair_metrics->turbine_slot0 = turbine_slot0;
      25           0 : }
      26             : 
      27             : void
      28             : fd_repair_metrics_add_slot( fd_repair_metrics_t * repair_metrics,
      29             :                      ulong          slot,
      30             :                      long           first_shred_ts,
      31             :                      long           slot_complete_ts,
      32             :                      uint           repair_cnt,
      33           0 :                      uint           turbine_cnt ) {
      34           0 :   uint next_en = (repair_metrics->en + 1) % FD_CATCHUP_METRICS_MAX;
      35           0 :   if( FD_UNLIKELY( next_en == repair_metrics->st || repair_metrics->st == UINT_MAX ) ) {
      36           0 :     repair_metrics->st = (repair_metrics->st + 1) % FD_CATCHUP_METRICS_MAX;
      37           0 :   }
      38           0 :   repair_metrics->slots[ next_en ].slot             = slot;
      39           0 :   repair_metrics->slots[ next_en ].first_shred_ts   = first_shred_ts;
      40           0 :   repair_metrics->slots[ next_en ].slot_complete_ts = slot_complete_ts;
      41           0 :   repair_metrics->slots[ next_en ].repair_cnt       = repair_cnt;
      42           0 :   repair_metrics->slots[ next_en ].turbine_cnt      = turbine_cnt;
      43           0 :   repair_metrics->en = next_en;
      44             : 
      45             : # if DEBUG_LOGGING
      46             :   if( FD_UNLIKELY( slot == repair_metrics->turbine_slot0 ) ) {
      47             :     fd_repair_metrics_print( repair_metrics );
      48             :   }
      49             : # endif
      50           0 : }
      51             : 
      52           0 : #define MAX_WIDTH 120
      53             : static char dashes[MAX_WIDTH + 1] = "========================================================================================================================";
      54             : static char spaces[MAX_WIDTH + 1] = "                                                                                                                        ";
      55             : 
      56             : #define print_slot_interval_bar( slot_metrics, tick_sz, min_ts, verbose )           \
      57           0 :   long duration = slot_metrics->slot_complete_ts - slot_metrics->first_shred_ts;    \
      58           0 :   int  width    = (int)((double)(duration) / tick_sz);                              \
      59           0 :   int  start    = (int)((double)(slot_metrics->first_shred_ts - min_ts) / tick_sz); \
      60           0 :   if( FD_UNLIKELY( verbose ) ) {                                                    \
      61           0 :     printf( "%lu [repaired: %u/%u]%.*s|%.*s| (%.2f ms)",                            \
      62           0 :             slot_metrics->slot,                                                     \
      63           0 :             slot_metrics->repair_cnt, slot_metrics->turbine_cnt + slot_metrics->repair_cnt, \
      64           0 :             start, spaces, width, dashes,                                                   \
      65           0 :             (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)duration) / 1e6 );       \
      66           0 :   } else {                                                                                  \
      67           0 :     printf( "%lu %.*s|%.*s| (%.2f ms)",                                                     \
      68           0 :           slot_metrics->slot,                                                               \
      69           0 :           start, spaces, width, dashes,                                                     \
      70           0 :           (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)duration) / 1e6 ); \
      71           0 :   }                                                                                 \
      72           0 :   if( FD_UNLIKELY( slot_metrics->slot == repair_metrics->turbine_slot0 ) ) {        \
      73           0 :     printf( " <--- (first turbine shred received)" );                               \
      74           0 :   }               \
      75           0 :   printf( "\n" );
      76             : 
      77             : static int
      78           0 : compare_slots( const void * a, const void * b ) {
      79           0 :   const fd_slot_metrics_t * slot_a = (const fd_slot_metrics_t *)a;
      80           0 :   const fd_slot_metrics_t * slot_b = (const fd_slot_metrics_t *)b;
      81             : 
      82           0 :   if( slot_a->slot < slot_b->slot ) return -1;
      83           0 :   if( slot_a->slot > slot_b->slot ) return 1;
      84           0 :   return 0;
      85           0 : }
      86             : 
      87             : /* Should be typically only called once, at the end of a catchup.  Other
      88             :    wise the information is not meaningful (during regular turbine, the
      89             :    slots are pretty much sequentially arriving in order anyway) */
      90             : void
      91           0 : fd_repair_metrics_print_sorted( fd_repair_metrics_t * repair_metrics, int verbose, fd_slot_metrics_t * temp_slots ) {
      92           0 :   if( repair_metrics->st == UINT_MAX ) return; // no data to sort
      93             : 
      94           0 :   uint temp_idx          = 0;
      95           0 :   long min_ts            = repair_metrics->slots[ repair_metrics->st ].first_shred_ts;
      96           0 :   long max_ts            = repair_metrics->slots[ repair_metrics->en ].slot_complete_ts;
      97           0 :   long repair_kickoff_ts = 0;  /* When we receive the first turbine shred, is when we begin orphans. */
      98           0 :   long finish_catchup_ts = 0;  /* the max of all slot < turbine slot0 completion times */
      99           0 :   int  num_catchup_slots = 0;
     100           0 :   uint total_slots       = 0;
     101           0 :   long slot_durations_sum   = 0;
     102           0 :   long incremental_cmpl_sum = 0;
     103           0 :   long prev_slot_ts         = repair_metrics->slots[ repair_metrics->st ].slot_complete_ts;
     104             : 
     105           0 :   for( uint i = repair_metrics->st;; i = (i + 1) % FD_CATCHUP_METRICS_MAX ) {
     106           0 :     fd_slot_metrics_t * slot_data = &repair_metrics->slots[ i ];
     107           0 :     temp_slots[ temp_idx++ ] = *slot_data;
     108           0 :     total_slots++;
     109           0 :     min_ts = fd_min( min_ts, slot_data->first_shred_ts );
     110           0 :     max_ts = fd_max( max_ts, slot_data->slot_complete_ts );
     111           0 :     if( FD_UNLIKELY( slot_data->slot == repair_metrics->turbine_slot0 ) ) repair_kickoff_ts = slot_data->first_shred_ts;
     112           0 :     if( FD_UNLIKELY( slot_data->slot <= repair_metrics->turbine_slot0 ) ) finish_catchup_ts = fd_max( finish_catchup_ts, slot_data->slot_complete_ts );
     113           0 :     if( FD_UNLIKELY( slot_data->slot <= repair_metrics->turbine_slot0 ) ) num_catchup_slots++;
     114           0 :     slot_durations_sum += slot_data->slot_complete_ts - slot_data->first_shred_ts;
     115             : 
     116           0 :     incremental_cmpl_sum += slot_data->slot_complete_ts - prev_slot_ts;
     117           0 :     prev_slot_ts          = slot_data->slot_complete_ts;
     118             : 
     119           0 :     if( i == repair_metrics->en ) break;
     120           0 :   }
     121             : 
     122             :   /* Sort temp array by slot */
     123           0 :   qsort( temp_slots, total_slots, sizeof(fd_slot_metrics_t), compare_slots );
     124             : 
     125             :   /* prints a stacked depth chart of the catchup metrics:
     126             :     slot          |===============| (duration in ms)
     127             :     slot              |================|
     128             :     etc. */
     129             : 
     130           0 :   double tick_sz          = (double)(max_ts - min_ts) / (double)MAX_WIDTH;
     131           0 :   long   orphan_cmpl_ts   = temp_slots[0].first_shred_ts; /* When we make the request for the snapshot slot, this is about when the full tree is connected. */
     132           0 :   int    orphans_cmpl_cnt = 0;                      /* Count of slots completed by the time orphan requests are done */
     133           0 :   for( uint i = 0; i < total_slots; i++ ) {
     134           0 :     fd_slot_metrics_t * slot_metrics = &temp_slots[ i ];
     135           0 :     if( FD_UNLIKELY( slot_metrics->slot_complete_ts < orphan_cmpl_ts ) ) orphans_cmpl_cnt++;
     136           0 :     print_slot_interval_bar( slot_metrics, tick_sz, min_ts, verbose );
     137           0 :   }
     138           0 :   fflush( stdout );
     139             : 
     140             : 
     141           0 :   double pipelined_time     = (double)(max_ts - min_ts);
     142           0 :   double non_pipelined_time = (double)slot_durations_sum;
     143           0 :   FD_LOG_NOTICE(( "\n"
     144           0 :                   "Completed %u slots in %.2f seconds total. \n"
     145           0 :                   "Average slot duration (time from first shred/rq to all shreds received): %.2f ms\n"
     146           0 :                   "Average time between slot completions:                                   %.2f ms\n"
     147           0 :                   "Average slots per second:                                                %.2f\n"
     148           0 :                   "Pipeline factor (sum duration of all slots / total time):                %.2f\n",
     149           0 :                   total_slots,
     150           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)pipelined_time) / 1e9,
     151           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)slot_durations_sum) / (double)total_slots / 1e6,
     152           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)incremental_cmpl_sum) / (double)total_slots / 1e6,
     153           0 :                   (double)total_slots / (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)pipelined_time) * 1e9,
     154           0 :                   non_pipelined_time / pipelined_time ));
     155             : 
     156           0 :   FD_LOG_NOTICE(( "\n"
     157           0 :                   "Caught up %d slots in %.2f ms total. \n"
     158           0 :                   "Time to repair orphans:                    %.2f ms \n"
     159           0 :                   "Total time from connected orphan to done:  %.2f ms \n"
     160           0 :                   "Slots completed by orphans connected:      %d\n",
     161           0 :                   num_catchup_slots,
     162           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)(finish_catchup_ts - repair_kickoff_ts)) / 1e6,
     163           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)(orphan_cmpl_ts    - repair_kickoff_ts)) / 1e6,
     164           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)(finish_catchup_ts - orphan_cmpl_ts   )) / 1e6,
     165           0 :                   orphans_cmpl_cnt ));
     166           0 : }
     167             : 
     168             : void
     169           0 : fd_repair_metrics_print( fd_repair_metrics_t * repair_metrics, int verbose ) {
     170           0 :   long min_ts            = repair_metrics->slots[ repair_metrics->st ].first_shred_ts;
     171           0 :   long max_ts            = repair_metrics->slots[ repair_metrics->en ].slot_complete_ts;
     172           0 :   uint total_slots       = 0;
     173           0 :   long prev_slot_ts         = repair_metrics->slots[ repair_metrics->st ].slot_complete_ts;
     174           0 :   long incremental_cmpl_sum = 0;
     175             : 
     176           0 :   long slot_durations_sum = 0;
     177           0 :   for( uint i = repair_metrics->st;; i = (i + 1) % FD_CATCHUP_METRICS_MAX ) {
     178           0 :     fd_slot_metrics_t * slot_data = &repair_metrics->slots[ i ];
     179           0 :     slot_durations_sum += (slot_data->slot_complete_ts - slot_data->first_shred_ts);
     180           0 :     total_slots++;
     181           0 :     min_ts = fd_min( min_ts, slot_data->first_shred_ts );
     182           0 :     max_ts = fd_max( max_ts, slot_data->slot_complete_ts );
     183             : 
     184             :     /* st -> en are already ordered by completion time. so this-prev
     185             :        is guaranteed to be > 0 */
     186           0 :     incremental_cmpl_sum += slot_data->slot_complete_ts - prev_slot_ts;
     187           0 :     prev_slot_ts          = slot_data->slot_complete_ts;
     188             : 
     189           0 :     if( FD_UNLIKELY( i == repair_metrics->en ) ) break;
     190           0 :   }
     191             : 
     192             :   /* prints a stacked depth chart of the catchup metrics:
     193             :      slot          |===============| (duration in ms)
     194             :      slot              |================|
     195             :      etc. */
     196             : 
     197           0 :   double tick_sz = (double)(max_ts - min_ts) / (double)MAX_WIDTH;
     198           0 :   for( uint i = repair_metrics->st;; i = (i + 1) % FD_CATCHUP_METRICS_MAX ) {
     199           0 :     fd_slot_metrics_t * slot_metrics = &repair_metrics->slots[ i ];
     200           0 :     print_slot_interval_bar( slot_metrics, tick_sz, min_ts, verbose );
     201           0 :     if( i == repair_metrics->en ) break;
     202           0 :   }
     203           0 :   fflush( stdout );
     204             : 
     205           0 :   double pipelined_time     = (double)(max_ts - min_ts);
     206           0 :   double non_pipelined_time = (double)slot_durations_sum;
     207           0 :   FD_LOG_NOTICE(( "\n"
     208           0 :                   "Over past %u completed slots: \n"
     209           0 :                   "Average slot duration (time from first shred/rq to all shreds received): %.2f ms\n"
     210           0 :                   "Average time between slot completions:                                   %.2f ms\n"
     211           0 :                   "Average slots per second:                                                %.2f\n"
     212           0 :                   "Pipeline factor (sum duration of all slots / total time):                %.2f\n",
     213           0 :                   total_slots,
     214           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)slot_durations_sum) / (double)total_slots / 1e6,
     215           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)incremental_cmpl_sum) / (double)total_slots / 1e6,
     216           0 :                   (double)total_slots / (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)pipelined_time) * 1e9,
     217           0 :                   non_pipelined_time / pipelined_time ));
     218           0 : }
     219             : 
     220             : #undef MAX_WIDTH

Generated by: LCOV version 1.14