LCOV - code coverage report
Current view: top level - discof/repair - fd_repair_metrics.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 152 0.0 %
Date: 2026-05-19 08:05:42 Functions: 0 7 0.0 %

          Line data    Source code
       1             : #include "fd_repair_metrics.h"
       2             : #include <stdio.h>
       3             : #include <stdlib.h>
       4             : 
       5             : #include "../../disco/metrics/fd_metrics.h"
       6             : 
       7             : void *
       8           0 : fd_repair_metrics_new( void * mem ) {
       9           0 :   fd_repair_metrics_t * repair_metrics = (fd_repair_metrics_t *)mem;
      10           0 :   repair_metrics->st = UINT_MAX;
      11           0 :   repair_metrics->en = UINT_MAX;
      12           0 :   repair_metrics->turbine_slot0 = 0;
      13             : 
      14           0 :   return repair_metrics;
      15           0 : }
      16             : 
      17             : fd_repair_metrics_t *
      18           0 : fd_repair_metrics_join( void * repair_metrics ) {
      19           0 :   return (fd_repair_metrics_t *)repair_metrics;
      20           0 : }
      21             : 
      22             : void
      23           0 : fd_repair_metrics_set_turbine_slot0( fd_repair_metrics_t * repair_metrics, ulong turbine_slot0 ) {
      24           0 :   repair_metrics->turbine_slot0 = turbine_slot0;
      25           0 : }
      26             : 
      27             : void
      28             : fd_repair_metrics_add_slot( fd_repair_metrics_t * repair_metrics,
      29             :                             ulong                 slot,
      30             :                             long                  first_shred_ts,
      31             :                             long                  slot_complete_ts,
      32             :                             uint                  repair_cnt,
      33           0 :                             uint                  turbine_cnt ) {
      34           0 :   uint next_en = (repair_metrics->en + 1) % FD_CATCHUP_METRICS_MAX;
      35           0 :   if( FD_UNLIKELY( next_en == repair_metrics->st || repair_metrics->st == UINT_MAX ) ) {
      36           0 :     repair_metrics->st = (repair_metrics->st + 1) % FD_CATCHUP_METRICS_MAX;
      37           0 :   }
      38           0 :   repair_metrics->slots[ next_en ].slot             = slot;
      39           0 :   repair_metrics->slots[ next_en ].first_shred_ts   = first_shred_ts;
      40           0 :   repair_metrics->slots[ next_en ].slot_complete_ts = slot_complete_ts;
      41           0 :   repair_metrics->slots[ next_en ].repair_cnt       = repair_cnt;
      42           0 :   repair_metrics->slots[ next_en ].turbine_cnt      = turbine_cnt;
      43           0 :   repair_metrics->en = next_en;
      44           0 : }
      45             : 
      46           0 : #define MAX_WIDTH 120
      47             : static char dashes[MAX_WIDTH + 1] = "========================================================================================================================";
      48             : static char spaces[MAX_WIDTH + 1] = "                                                                                                                        ";
      49             : 
      50             : #define print_slot_interval_bar( slot_metrics, tick_sz, min_ts, verbose )           \
      51           0 :   long duration = slot_metrics->slot_complete_ts - slot_metrics->first_shred_ts;    \
      52           0 :   int  width    = (int)((double)(duration) / tick_sz);                              \
      53           0 :   int  start    = (int)((double)(slot_metrics->first_shred_ts - min_ts) / tick_sz); \
      54           0 :   if( FD_UNLIKELY( verbose ) ) {                                                    \
      55           0 :     printf( "%lu [repaired: %u/%u]%.*s|%.*s| (%.2f ms)",                            \
      56           0 :             slot_metrics->slot,                                                     \
      57           0 :             slot_metrics->repair_cnt, slot_metrics->turbine_cnt + slot_metrics->repair_cnt, \
      58           0 :             start, spaces, width, dashes,                                                   \
      59           0 :             (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)duration) / 1e6 );       \
      60           0 :   } else {                                                                                  \
      61           0 :     printf( "%lu %.*s|%.*s| (%.2f ms)",                                                     \
      62           0 :           slot_metrics->slot,                                                               \
      63           0 :           start, spaces, width, dashes,                                                     \
      64           0 :           (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)duration) / 1e6 ); \
      65           0 :   }                                                                                 \
      66           0 :   if( FD_UNLIKELY( slot_metrics->slot == repair_metrics->turbine_slot0 ) ) {        \
      67           0 :     printf( " <--- (first turbine shred received)" );                               \
      68           0 :   }               \
      69           0 :   printf( "\n" );
      70             : 
      71             : static int
      72           0 : compare_slots( const void * a, const void * b ) {
      73           0 :   const fd_slot_metrics_t * slot_a = (const fd_slot_metrics_t *)a;
      74           0 :   const fd_slot_metrics_t * slot_b = (const fd_slot_metrics_t *)b;
      75             : 
      76           0 :   if( slot_a->slot < slot_b->slot ) return -1;
      77           0 :   if( slot_a->slot > slot_b->slot ) return 1;
      78           0 :   return 0;
      79           0 : }
      80             : 
      81             : /* Should be typically only called once, at the end of a catchup.  Other
      82             :    wise the information is not meaningful (during regular turbine, the
      83             :    slots are pretty much sequentially arriving in order anyway) */
      84             : void
      85           0 : fd_repair_metrics_print_sorted( fd_repair_metrics_t * repair_metrics, int verbose, fd_slot_metrics_t * temp_slots ) {
      86           0 :   if( repair_metrics->st == UINT_MAX ) return; // no data to sort
      87             : 
      88           0 :   uint temp_idx          = 0;
      89           0 :   long min_ts            = repair_metrics->slots[ repair_metrics->st ].first_shred_ts;
      90           0 :   long max_ts            = repair_metrics->slots[ repair_metrics->en ].slot_complete_ts;
      91           0 :   long repair_kickoff_ts = 0;  /* When we receive the first turbine shred, is when we begin orphans. */
      92           0 :   long finish_catchup_ts = 0;  /* the max of all slot < turbine slot0 completion times */
      93           0 :   int  num_catchup_slots = 0;
      94           0 :   uint total_slots       = 0;
      95           0 :   long slot_durations_sum   = 0;
      96           0 :   long incremental_cmpl_sum = 0;
      97           0 :   long prev_slot_ts         = repair_metrics->slots[ repair_metrics->st ].slot_complete_ts;
      98             : 
      99           0 :   for( uint i = repair_metrics->st;; i = (i + 1) % FD_CATCHUP_METRICS_MAX ) {
     100           0 :     fd_slot_metrics_t * slot_data = &repair_metrics->slots[ i ];
     101           0 :     temp_slots[ temp_idx++ ] = *slot_data;
     102           0 :     total_slots++;
     103           0 :     min_ts = fd_min( min_ts, slot_data->first_shred_ts );
     104           0 :     max_ts = fd_max( max_ts, slot_data->slot_complete_ts );
     105           0 :     if( FD_UNLIKELY( slot_data->slot == repair_metrics->turbine_slot0 ) ) repair_kickoff_ts = slot_data->first_shred_ts;
     106           0 :     if( FD_UNLIKELY( slot_data->slot <= repair_metrics->turbine_slot0 ) ) finish_catchup_ts = fd_max( finish_catchup_ts, slot_data->slot_complete_ts );
     107           0 :     if( FD_UNLIKELY( slot_data->slot <= repair_metrics->turbine_slot0 ) ) num_catchup_slots++;
     108           0 :     slot_durations_sum += slot_data->slot_complete_ts - slot_data->first_shred_ts;
     109             : 
     110           0 :     incremental_cmpl_sum += slot_data->slot_complete_ts - prev_slot_ts;
     111           0 :     prev_slot_ts          = slot_data->slot_complete_ts;
     112             : 
     113           0 :     if( i == repair_metrics->en ) break;
     114           0 :   }
     115             : 
     116             :   /* Sort temp array by slot */
     117           0 :   qsort( temp_slots, total_slots, sizeof(fd_slot_metrics_t), compare_slots );
     118             : 
     119             :   /* prints a stacked depth chart of the catchup metrics:
     120             :     slot          |===============| (duration in ms)
     121             :     slot              |================|
     122             :     etc. */
     123             : 
     124           0 :   double tick_sz          = (double)(max_ts - min_ts) / (double)MAX_WIDTH;
     125           0 :   long   orphan_cmpl_ts   = temp_slots[0].first_shred_ts; /* When we make the request for the snapshot slot, this is about when the full tree is connected. */
     126           0 :   int    orphans_cmpl_cnt = 0;                      /* Count of slots completed by the time orphan requests are done */
     127           0 :   for( uint i = 0; i < total_slots; i++ ) {
     128           0 :     fd_slot_metrics_t * slot_metrics = &temp_slots[ i ];
     129           0 :     if( FD_UNLIKELY( slot_metrics->slot_complete_ts < orphan_cmpl_ts ) ) orphans_cmpl_cnt++;
     130           0 :     print_slot_interval_bar( slot_metrics, tick_sz, min_ts, verbose );
     131           0 :   }
     132           0 :   fflush( stdout );
     133             : 
     134             : 
     135           0 :   double pipelined_time     = (double)(max_ts - min_ts);
     136           0 :   double non_pipelined_time = (double)slot_durations_sum;
     137           0 :   FD_LOG_NOTICE(( "\n"
     138           0 :                   "Completed %u slots in %.2f seconds total. \n"
     139           0 :                   "Average slot duration (time from first shred/rq to all shreds received): %.2f ms\n"
     140           0 :                   "Average time between slot completions:                                   %.2f ms\n"
     141           0 :                   "Average slots per second:                                                %.2f\n"
     142           0 :                   "Pipeline factor (sum duration of all slots / total time):                %.2f\n",
     143           0 :                   total_slots,
     144           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)pipelined_time) / 1e9,
     145           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)slot_durations_sum) / (double)total_slots / 1e6,
     146           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)incremental_cmpl_sum) / (double)total_slots / 1e6,
     147           0 :                   (double)total_slots / (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)pipelined_time) * 1e9,
     148           0 :                   non_pipelined_time / pipelined_time ));
     149             : 
     150           0 :   FD_LOG_NOTICE(( "\n"
     151           0 :                   "Caught up %d slots in %.2f ms total. \n"
     152           0 :                   "Time to repair orphans:                    %.2f ms \n"
     153           0 :                   "Total time from connected orphan to done:  %.2f ms \n"
     154           0 :                   "Slots completed by orphans connected:      %d\n",
     155           0 :                   num_catchup_slots,
     156           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)(finish_catchup_ts - repair_kickoff_ts)) / 1e6,
     157           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)(orphan_cmpl_ts    - repair_kickoff_ts)) / 1e6,
     158           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)(finish_catchup_ts - orphan_cmpl_ts   )) / 1e6,
     159           0 :                   orphans_cmpl_cnt ));
     160           0 : }
     161             : 
     162             : void
     163           0 : fd_repair_metrics_print( fd_repair_metrics_t * repair_metrics, int verbose ) {
     164           0 :   if( repair_metrics->st == UINT_MAX ) return; // no data to print
     165           0 :   long min_ts            = repair_metrics->slots[ repair_metrics->st ].first_shred_ts;
     166           0 :   long max_ts            = repair_metrics->slots[ repair_metrics->en ].slot_complete_ts;
     167           0 :   uint total_slots       = 0;
     168           0 :   long prev_slot_ts         = repair_metrics->slots[ repair_metrics->st ].slot_complete_ts;
     169           0 :   long incremental_cmpl_sum = 0;
     170             : 
     171           0 :   long slot_durations_sum = 0;
     172           0 :   for( uint i = repair_metrics->st;; i = (i + 1) % FD_CATCHUP_METRICS_MAX ) {
     173           0 :     fd_slot_metrics_t * slot_data = &repair_metrics->slots[ i ];
     174           0 :     slot_durations_sum += (slot_data->slot_complete_ts - slot_data->first_shred_ts);
     175           0 :     total_slots++;
     176           0 :     min_ts = fd_min( min_ts, slot_data->first_shred_ts );
     177           0 :     max_ts = fd_max( max_ts, slot_data->slot_complete_ts );
     178             : 
     179             :     /* st -> en are already ordered by completion time. so this-prev
     180             :        is guaranteed to be > 0 */
     181           0 :     incremental_cmpl_sum += slot_data->slot_complete_ts - prev_slot_ts;
     182           0 :     prev_slot_ts          = slot_data->slot_complete_ts;
     183             : 
     184           0 :     if( FD_UNLIKELY( i == repair_metrics->en ) ) break;
     185           0 :   }
     186             : 
     187             :   /* prints a stacked depth chart of the catchup metrics:
     188             :      slot          |===============| (duration in ms)
     189             :      slot              |================|
     190             :      etc. */
     191             : 
     192           0 :   double tick_sz = (double)(max_ts - min_ts) / (double)MAX_WIDTH;
     193           0 :   for( uint i = repair_metrics->st;; i = (i + 1) % FD_CATCHUP_METRICS_MAX ) {
     194           0 :     fd_slot_metrics_t * slot_metrics = &repair_metrics->slots[ i ];
     195           0 :     print_slot_interval_bar( slot_metrics, tick_sz, min_ts, verbose );
     196           0 :     if( i == repair_metrics->en ) break;
     197           0 :   }
     198           0 :   fflush( stdout );
     199             : 
     200           0 :   double pipelined_time     = (double)(max_ts - min_ts);
     201           0 :   double non_pipelined_time = (double)slot_durations_sum;
     202           0 :   FD_LOG_NOTICE(( "\n"
     203           0 :                   "Over past %u completed slots: \n"
     204           0 :                   "Average slot duration (time from first shred/rq to all shreds received): %.2f ms\n"
     205           0 :                   "Average time between slot completions:                                   %.2f ms\n"
     206           0 :                   "Average slots per second:                                                %.2f\n"
     207           0 :                   "Pipeline factor (sum duration of all slots / total time):                %.2f\n",
     208           0 :                   total_slots,
     209           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)slot_durations_sum) / (double)total_slots / 1e6,
     210           0 :                   (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)incremental_cmpl_sum) / (double)total_slots / 1e6,
     211           0 :                   (double)total_slots / (double)fd_metrics_convert_ticks_to_nanoseconds((ulong)pipelined_time) * 1e9,
     212           0 :                   non_pipelined_time / pipelined_time ));
     213           0 : }
     214             : 
     215             : #undef MAX_WIDTH

Generated by: LCOV version 1.14