Line data Source code
1 : #include "fd_stat.h"
2 :
3 : #define FD_STAT_IMPL( T, UT ) \
4 : ulong \
5 : fd_stat_filter_##T( T * y, \
6 : T const * x, \
7 : ulong n, \
8 36012024 : T thresh ) { /* assumed positive */ \
9 36012024 : ulong j = 0UL; \
10 306779346 : for( ulong i=0UL; i<n; i++ ) { \
11 270767322 : T xi = x[i]; \
12 270767322 : y[j] = xi; /* speculate */ \
13 270767322 : j += (ulong)(fd_##T##_abs( xi )<=(UT)thresh); /* commit */ \
14 270767322 : } \
15 36012024 : return j; \
16 36012024 : } \
17 : \
18 : T \
19 : fd_stat_median_##T( T * x, \
20 24048 : ulong cnt ) { \
21 24048 : if( FD_UNLIKELY( !cnt ) ) return (T)0; \
22 24048 : ulong i1 = cnt >> 1; \
23 23868 : T m1 = fd_sort_up_##T##_select( x, cnt, i1 )[ i1 ]; \
24 23868 : if( (cnt & 1UL) ) return m1; \
25 23868 : ulong i0 = i1-1UL; \
26 11796 : T m0 = fd_sort_up_##T##_select( x, cnt, i0 )[ i0 ]; \
27 11796 : return fd_stat_avg2_##T( m0, m1 ); \
28 23868 : }
29 :
30 : FD_STAT_IMPL( schar, uchar )
31 : FD_STAT_IMPL( short, ushort )
32 : FD_STAT_IMPL( int, uint )
33 : FD_STAT_IMPL( long, ulong )
34 : FD_STAT_IMPL( uchar, uchar )
35 : FD_STAT_IMPL( ushort, ushort )
36 : FD_STAT_IMPL( uint, uint )
37 : FD_STAT_IMPL( ulong, ulong )
38 : #if FD_HAS_INT128
39 : FD_STAT_IMPL( int128, uint128 )
40 : FD_STAT_IMPL( uint128, uint128 )
41 : #endif
42 : FD_STAT_IMPL( float, float )
43 : #if FD_HAS_DOUBLE
44 : FD_STAT_IMPL( double, double )
45 : #endif
46 :
47 : #undef FD_STAT_FILTER_IMPL
48 :
49 : ulong
50 : fd_stat_robust_norm_fit_float( float * opt_mu,
51 : float * opt_sigma,
52 : float const * x,
53 : ulong cnt,
54 3000 : void * scratch ) {
55 3000 : float * y = (float *)scratch;
56 :
57 : /* Filter out weird data points. The threshold is such the sigma
58 : calculation cannot overflow. Specifically consider an x whose
59 : elements are +/-thresh. The median would end being exactly
60 : +/-thresh. The absolute deviations from the median then would then
61 : be either 0,2*thresh. Such that the median absolute deviation
62 : could be 2*thresh. And normalizing such like a normal would be
63 : sigma 2*1.48... thresh <= FLT_MAX. We use a slightly more
64 : conservative FLT_MAX/5 to keep things simple to explain and
65 : consistent with the thresh used by the robust exp fit below. */
66 :
67 3000 : cnt = fd_stat_filter_float( y, x, cnt, FLT_MAX/5.f );
68 3000 : if( FD_LIKELY( opt_mu || opt_sigma ) ) {
69 :
70 : /* Compute the median. This is an unbiased and maximally robust
71 : estimator of the average in the presence of corruption. It is
72 : not the most accurate estimator if the data is clean though. If
73 : cnt is zero post filtering, mu (and sigma) will be zero. */
74 :
75 3000 : float mu = fd_stat_median_float( y, cnt );
76 3000 : if( opt_mu ) *opt_mu = mu;
77 :
78 3000 : if( opt_sigma ) {
79 199551 : for( ulong i=0UL; i<cnt; i++ ) y[i] = fd_float_abs( y[i] - mu );
80 3000 : *opt_sigma = 1.48260221850560f*fd_stat_median_float( y, cnt ); /* 1 / inv_cdf(0.75) */
81 3000 : }
82 3000 : }
83 :
84 3000 : return cnt;
85 3000 : }
86 :
87 : ulong
88 : fd_stat_robust_exp_fit_float( float * opt_x0,
89 : float * opt_tau,
90 : float const * x,
91 : ulong cnt,
92 3000 : void * scratch ) {
93 3000 : float * y = (float *)scratch;
94 :
95 : /* Filter out weird data points. The threshold is such the x0
96 : and tau calculations cannot overflow. Specifically consider an x
97 : whose elements are +/-thresh. The magnitude of the median is at
98 : most thresh and the median absolute deviation is at most 2*thresh.
99 : x0 then is at most (1+2*1.44...)*thresh and tau is at most 2*2.07
100 : thresh. We use a slightly more conservative FLT_MAX/5 for thresh
101 : to keep things simple. */
102 :
103 3000 : cnt = fd_stat_filter_float( y, x, cnt, FLT_MAX/5.f );
104 3000 : if( FD_LIKELY( opt_x0 || opt_tau ) ) {
105 :
106 : /* Compute the median and median absolute deviation from the median. */
107 3000 : float med = fd_stat_median_float( y, cnt );
108 199551 : for( ulong i=0UL; i<cnt; i++ ) y[i] = fd_float_abs( y[i] - med );
109 3000 : float mad = fd_stat_median_float( y, cnt );
110 :
111 : /* Estimate the parameters from the distribution */
112 3000 : if( opt_x0 ) *opt_x0 = med - mad*1.44042009041256f; /* (ln 2) / asinh(1/2) */
113 3000 : if( opt_tau ) *opt_tau = mad*2.07808692123503f; /* 1 / asinh(1/2) */
114 :
115 3000 : }
116 3000 : return cnt;
117 3000 : }
118 :
119 : #if FD_HAS_DOUBLE /* See above for details */
120 :
121 : ulong
122 : fd_stat_robust_norm_fit_double( double * opt_mu,
123 : double * opt_sigma,
124 : double const * x,
125 : ulong cnt,
126 3018 : void * scratch ) {
127 3018 : double * y = (double *)scratch;
128 3018 : cnt = fd_stat_filter_double( y, x, cnt, DBL_MAX/5. );
129 3018 : if( FD_LIKELY( opt_mu || opt_sigma ) ) {
130 3018 : double mu = fd_stat_median_double( y, cnt );
131 3018 : if( opt_mu ) *opt_mu = mu;
132 3018 : if( opt_sigma ) {
133 198471 : for( ulong i=0UL; i<cnt; i++ ) y[i] = fd_double_abs( y[i] - mu );
134 3018 : *opt_sigma = 1.48260221850560*fd_stat_median_double( y, cnt );
135 3018 : }
136 3018 : }
137 :
138 3018 : return cnt;
139 3018 : }
140 :
141 : ulong
142 : fd_stat_robust_exp_fit_double( double * opt_x0,
143 : double * opt_tau,
144 : double const * x,
145 : ulong cnt,
146 3006 : void * scratch ) {
147 3006 : double * y = (double *)scratch;
148 3006 : cnt = fd_stat_filter_double( y, x, cnt, DBL_MAX/5. );
149 3006 : if( FD_LIKELY( opt_x0 || opt_tau ) ) {
150 3006 : double med = fd_stat_median_double( y, cnt );
151 200331 : for( ulong i=0UL; i<cnt; i++ ) y[i] = fd_double_abs( y[i] - med );
152 3006 : double mad = fd_stat_median_double( y, cnt );
153 3006 : if( opt_x0 ) *opt_x0 = med - mad*1.44042009041256;
154 3006 : if( opt_tau ) *opt_tau = mad*2.07808692123503;
155 3006 : }
156 3006 : return cnt;
157 3006 : }
158 :
159 : #endif
160 :
161 : /* ascending sorts */
162 :
163 : #define SORT_NAME fd_sort_up_schar
164 0 : #define SORT_KEY_T schar
165 : #define SORT_IMPL_STYLE 2
166 : #include "../tmpl/fd_sort.c"
167 :
168 : #define SORT_NAME fd_sort_up_short
169 0 : #define SORT_KEY_T short
170 : #define SORT_IMPL_STYLE 2
171 : #include "../tmpl/fd_sort.c"
172 :
173 : #define SORT_NAME fd_sort_up_int
174 0 : #define SORT_KEY_T int
175 : #define SORT_IMPL_STYLE 2
176 : #include "../tmpl/fd_sort.c"
177 :
178 : #define SORT_NAME fd_sort_up_long
179 0 : #define SORT_KEY_T long
180 : #define SORT_IMPL_STYLE 2
181 : #include "../tmpl/fd_sort.c"
182 :
183 : #define SORT_NAME fd_sort_up_uchar
184 0 : #define SORT_KEY_T uchar
185 : #define SORT_IMPL_STYLE 2
186 : #include "../tmpl/fd_sort.c"
187 :
188 : #define SORT_NAME fd_sort_up_ushort
189 0 : #define SORT_KEY_T ushort
190 : #define SORT_IMPL_STYLE 2
191 : #include "../tmpl/fd_sort.c"
192 :
193 : #define SORT_NAME fd_sort_up_uint
194 0 : #define SORT_KEY_T uint
195 : #define SORT_IMPL_STYLE 2
196 : #include "../tmpl/fd_sort.c"
197 :
198 : #define SORT_NAME fd_sort_up_ulong
199 0 : #define SORT_KEY_T ulong
200 : #define SORT_IMPL_STYLE 2
201 : #include "../tmpl/fd_sort.c"
202 :
203 : #if FD_HAS_INT128
204 : #define SORT_NAME fd_sort_up_int128
205 0 : #define SORT_KEY_T int128
206 : #define SORT_IMPL_STYLE 2
207 : #include "../tmpl/fd_sort.c"
208 :
209 : #define SORT_NAME fd_sort_up_uint128
210 0 : #define SORT_KEY_T uint128
211 : #define SORT_IMPL_STYLE 2
212 : #include "../tmpl/fd_sort.c"
213 : #endif
214 :
215 : #define SORT_NAME fd_sort_up_float
216 3387984 : #define SORT_KEY_T float
217 : #define SORT_IMPL_STYLE 2
218 : #include "../tmpl/fd_sort.c"
219 :
220 : #if FD_HAS_DOUBLE
221 : #define SORT_NAME fd_sort_up_double
222 3340893 : #define SORT_KEY_T double
223 : #define SORT_IMPL_STYLE 2
224 : #include "../tmpl/fd_sort.c"
225 : #endif
226 :
227 : /* descending sorts */
228 :
229 : #define SORT_NAME fd_sort_dn_schar
230 0 : #define SORT_KEY_T schar
231 0 : #define SORT_BEFORE(a,b) ((a)>(b))
232 : #define SORT_IMPL_STYLE 2
233 : #include "../tmpl/fd_sort.c"
234 :
235 : #define SORT_NAME fd_sort_dn_short
236 0 : #define SORT_KEY_T short
237 0 : #define SORT_BEFORE(a,b) ((a)>(b))
238 : #define SORT_IMPL_STYLE 2
239 : #include "../tmpl/fd_sort.c"
240 :
241 : #define SORT_NAME fd_sort_dn_int
242 0 : #define SORT_KEY_T int
243 0 : #define SORT_BEFORE(a,b) ((a)>(b))
244 : #define SORT_IMPL_STYLE 2
245 : #include "../tmpl/fd_sort.c"
246 :
247 : #define SORT_NAME fd_sort_dn_long
248 0 : #define SORT_KEY_T long
249 0 : #define SORT_BEFORE(a,b) ((a)>(b))
250 : #define SORT_IMPL_STYLE 2
251 : #include "../tmpl/fd_sort.c"
252 :
253 : #define SORT_NAME fd_sort_dn_uchar
254 0 : #define SORT_KEY_T uchar
255 0 : #define SORT_BEFORE(a,b) ((a)>(b))
256 : #define SORT_IMPL_STYLE 2
257 : #include "../tmpl/fd_sort.c"
258 :
259 : #define SORT_NAME fd_sort_dn_ushort
260 0 : #define SORT_KEY_T ushort
261 0 : #define SORT_BEFORE(a,b) ((a)>(b))
262 : #define SORT_IMPL_STYLE 2
263 : #include "../tmpl/fd_sort.c"
264 :
265 : #define SORT_NAME fd_sort_dn_uint
266 0 : #define SORT_KEY_T uint
267 0 : #define SORT_BEFORE(a,b) ((a)>(b))
268 : #define SORT_IMPL_STYLE 2
269 : #include "../tmpl/fd_sort.c"
270 :
271 : #define SORT_NAME fd_sort_dn_ulong
272 0 : #define SORT_KEY_T ulong
273 0 : #define SORT_BEFORE(a,b) ((a)>(b))
274 : #define SORT_IMPL_STYLE 2
275 : #include "../tmpl/fd_sort.c"
276 :
277 : #if FD_HAS_INT128
278 : #define SORT_NAME fd_sort_dn_int128
279 0 : #define SORT_KEY_T int128
280 0 : #define SORT_BEFORE(a,b) ((a)>(b))
281 : #define SORT_IMPL_STYLE 2
282 : #include "../tmpl/fd_sort.c"
283 :
284 : #define SORT_NAME fd_sort_dn_uint128
285 0 : #define SORT_KEY_T uint128
286 0 : #define SORT_BEFORE(a,b) ((a)>(b))
287 : #define SORT_IMPL_STYLE 2
288 : #include "../tmpl/fd_sort.c"
289 : #endif
290 :
291 : #define SORT_NAME fd_sort_dn_float
292 0 : #define SORT_KEY_T float
293 0 : #define SORT_BEFORE(a,b) ((a)>(b))
294 : #define SORT_IMPL_STYLE 2
295 : #include "../tmpl/fd_sort.c"
296 :
297 : #if FD_HAS_DOUBLE
298 : #define SORT_NAME fd_sort_dn_double
299 0 : #define SORT_KEY_T double
300 0 : #define SORT_BEFORE(a,b) ((a)>(b))
301 : #define SORT_IMPL_STYLE 2
302 : #include "../tmpl/fd_sort.c"
303 : #endif
304 :
|