Line data Source code
1 : #ifndef HEADER_fd_src_util_math_fd_stat_h
2 : #define HEADER_fd_src_util_math_fd_stat_h
3 :
4 : #include "../bits/fd_bits.h"
5 :
6 : FD_PROTOTYPES_BEGIN
7 :
8 : /* fd_stat_avg2_T computes the average of two numbers of type T without
9 : risk of intermediate overflow. For integer types, the value is
10 : computed in a round toward negative infinity sense. For floating
11 : point types, assuming the CPU good floating point handling and the
12 : floating point rounding mode has not been mucked with, the average is
13 : computed in a round to nearest even sense. */
14 :
15 12 : static inline schar fd_stat_avg2_schar ( schar x, schar y ) { return (schar )(((long )x + (long )y) >> 1); }
16 12 : static inline short fd_stat_avg2_short ( short x, short y ) { return (short )(((long )x + (long )y) >> 1); }
17 12 : static inline int fd_stat_avg2_int ( int x, int y ) { return (int )((long )x + (long )y) >> 1; }
18 12 : static inline uchar fd_stat_avg2_uchar ( uchar x, uchar y ) { return (uchar )(((ulong)x + (ulong)y) >> 1); }
19 12 : static inline ushort fd_stat_avg2_ushort ( ushort x, ushort y ) { return (ushort)(((ulong)x + (ulong)y) >> 1); }
20 12 : static inline uint fd_stat_avg2_uint ( uint x, uint y ) { return (uint )((ulong)x + (ulong)y) >> 1; }
21 12 : static inline long fd_stat_avg2_long ( long x, long y ) { return (x>>1) + (y>>1) + (x & y & 1L ); }
22 12 : static inline ulong fd_stat_avg2_ulong ( ulong x, ulong y ) { return (x>>1) + (y>>1) + (x & y & 1UL); }
23 : #if FD_HAS_INT128
24 0 : static inline int128 fd_stat_avg2_int128 ( int128 x, int128 y ) { return (x>>1) + (y>>1) + (x & y & (int128) 1); }
25 0 : static inline uint128 fd_stat_avg2_uint128( uint128 x, uint128 y ) { return (x>>1) + (y>>1) + (x & y & (uint128)1); }
26 : #endif
27 6048 : static inline float fd_stat_avg2_float ( float x, float y ) { return 0.5f*x + 0.5f*y; }
28 : #if FD_HAS_DOUBLE
29 5748 : static inline double fd_stat_avg2_double ( double x, double y ) { return 0.5*x + 0.5*y; }
30 : #endif
31 :
32 : /* fd_stat_filter_float computes y = x( |x| <= thresh ) where x is an
33 : array of cnt floats. thresh is assumed to be non-negative and finite
34 : (UB if not). |x| will be computed by fd_float_abs. Returns the
35 : number of elements found (will be in [0,cnt]). NaN elements of x
36 : will be filtered out. In place operation is fine. If running out of
37 : place, x and y should not overlap. Uses stream compaction for
38 : branchless implementation (high and predictable performance
39 : regardless of the density or distribution of values to filter); as
40 : such y must have room for up to n elements.
41 :
42 : fd_stat_median_float computes the median of the elements of x.
43 : Undefined behavior any x is NaN. Returns 0 if cnt is zero. The
44 : current implementation is O(cnt) (and not the fastest algorithmically
45 : possible when cnt is even ... should use a quick multiselect instead
46 : of two quick selects).
47 :
48 : Similarly for the other types. For even cnt and integral types, the
49 : returned median will be rounded toward negative infinity. */
50 :
51 : #define FD_STAT_DECL( T ) \
52 : ulong \
53 : fd_stat_filter_##T( T * y, \
54 : T const * x, \
55 : ulong cnt, \
56 : T thresh ); \
57 : \
58 : T \
59 : fd_stat_median_##T( T * x, \
60 : ulong cnt )
61 :
62 : FD_STAT_DECL( schar );
63 : FD_STAT_DECL( short );
64 : FD_STAT_DECL( int );
65 : FD_STAT_DECL( long );
66 : FD_STAT_DECL( uchar );
67 : FD_STAT_DECL( ushort );
68 : FD_STAT_DECL( uint );
69 : FD_STAT_DECL( ulong );
70 : #if FD_HAS_INT128
71 : FD_STAT_DECL( int128 );
72 : FD_STAT_DECL( uint128 );
73 : #endif
74 : FD_STAT_DECL( float );
75 : #if FD_HAS_DOUBLE
76 : FD_STAT_DECL( double );
77 : #endif
78 :
79 : #undef FD_STAT_DECL
80 :
81 : /* fd_stat_robust_norm_fit_float estimates the mean and rms of the
82 : cnt samples of x, indexed [0,cnt), assuming that most x are IID
83 : samples drawn from a normal distribution and the remainder are
84 : potentially arbitrarily corrupted. The method used here tries to
85 : optimize for robustness against data corruption as opposed to the
86 : more typical maximum likelihood (which is more accurate for clean
87 : data but much less robust to outliers corrupting the estimates).
88 :
89 : Only samples with magnitude less than FLT_MAX/5 will be included in
90 : this estimate (i.e. NaN, inf and/or inordinately large sample values
91 : will be ignored). Returns the number of data points used to form
92 : the estimate. On return, if opt_mu is non-NULL, *opt_mu will have
93 : the mean estimate (this is guaranteed to be a finite representation)
94 : and, if opt_sigma is non-NULL, *opt_sigma will have the rms estimate
95 : (this is guaranteed to be a finite representation). For pathological
96 : cases, if there were no valid samples, mu=sigma=0 and, if the
97 : majority of the samples are equal, mu=mode,sigma=0.
98 :
99 : scratch points to a float aligned region with space for up to cnt
100 : float. It will be clobbered on return. It is fine to pass x as
101 : scratch if destructive operation is fine. */
102 :
103 : ulong
104 : fd_stat_robust_norm_fit_float( float * opt_mu,
105 : float * opt_sigma,
106 : float const * x,
107 : ulong cnt,
108 : void * scratch );
109 :
110 : /* fd_stat_robust_exp_fit_float is same as the above with a shifted
111 : exponential distribution. x0 is the estimated minimum of this
112 : distribution and tau is the estimated decay length. For pathological
113 : cases, if there were no valid samples, x0=tau=0 and, if the majority
114 : of the samples are equal, x0=mode,tau=0. */
115 :
116 : ulong
117 : fd_stat_robust_exp_fit_float( float * opt_x0,
118 : float * opt_tau,
119 : float const * x,
120 : ulong cnt,
121 : void * scratch );
122 :
123 : /* These are double precision versions of the above. (The ignored
124 : threshold is accordingly increased to DBL_MAX/5.) */
125 :
126 : #if FD_HAS_DOUBLE
127 : ulong
128 : fd_stat_robust_norm_fit_double( double * opt_mu,
129 : double * opt_sigma,
130 : double const * x,
131 : ulong cnt,
132 : void * scratch );
133 :
134 : ulong
135 : fd_stat_robust_exp_fit_double( double * opt_x0,
136 : double * opt_tau,
137 : double const * x,
138 : ulong cnt,
139 : void * scratch );
140 : #endif
141 :
142 : /* Declare ascending and descending sorts for all the primitive types.
143 : For floating point sorts, the resulting order if NaNs are present is
144 : undefined (as they are not well ordered with anything). If there is
145 : a mixture of signed zeros and unsigned zeros, their exact order in
146 : the region of zeros will be undefined (as they are not well ordered
147 : amongst themselves). In short, don't pass arrays with NaN and ignore
148 : the sign of zero when using floating point sorts. */
149 :
150 : /* ascending */
151 :
152 : #define SORT_NAME fd_sort_up_schar
153 : #define SORT_KEY_T schar
154 : #define SORT_IMPL_STYLE 1
155 : #include "../tmpl/fd_sort.c"
156 :
157 : #define SORT_NAME fd_sort_up_short
158 : #define SORT_KEY_T short
159 : #define SORT_IMPL_STYLE 1
160 : #include "../tmpl/fd_sort.c"
161 :
162 : #define SORT_NAME fd_sort_up_int
163 : #define SORT_KEY_T int
164 : #define SORT_IMPL_STYLE 1
165 : #include "../tmpl/fd_sort.c"
166 :
167 : #define SORT_NAME fd_sort_up_long
168 : #define SORT_KEY_T long
169 : #define SORT_IMPL_STYLE 1
170 : #include "../tmpl/fd_sort.c"
171 :
172 : #define SORT_NAME fd_sort_up_uchar
173 : #define SORT_KEY_T uchar
174 : #define SORT_IMPL_STYLE 1
175 : #include "../tmpl/fd_sort.c"
176 :
177 : #define SORT_NAME fd_sort_up_ushort
178 : #define SORT_KEY_T ushort
179 : #define SORT_IMPL_STYLE 1
180 : #include "../tmpl/fd_sort.c"
181 :
182 : #define SORT_NAME fd_sort_up_uint
183 : #define SORT_KEY_T uint
184 : #define SORT_IMPL_STYLE 1
185 : #include "../tmpl/fd_sort.c"
186 :
187 : #define SORT_NAME fd_sort_up_ulong
188 : #define SORT_KEY_T ulong
189 : #define SORT_IMPL_STYLE 1
190 : #include "../tmpl/fd_sort.c"
191 :
192 : #if FD_HAS_INT128
193 : #define SORT_NAME fd_sort_up_int128
194 : #define SORT_KEY_T int128
195 : #define SORT_IMPL_STYLE 1
196 : #include "../tmpl/fd_sort.c"
197 :
198 : #define SORT_NAME fd_sort_up_uint128
199 : #define SORT_KEY_T uint128
200 : #define SORT_IMPL_STYLE 1
201 : #include "../tmpl/fd_sort.c"
202 : #endif
203 :
204 : #define SORT_NAME fd_sort_up_float
205 : #define SORT_KEY_T float
206 : #define SORT_IMPL_STYLE 1
207 : #include "../tmpl/fd_sort.c"
208 :
209 : #if FD_HAS_DOUBLE
210 : #define SORT_NAME fd_sort_up_double
211 : #define SORT_KEY_T double
212 : #define SORT_IMPL_STYLE 1
213 : #include "../tmpl/fd_sort.c"
214 : #endif
215 :
216 : /* descending */
217 :
218 : #define SORT_NAME fd_sort_dn_schar
219 : #define SORT_KEY_T schar
220 : #define SORT_IMPL_STYLE 1
221 : #include "../tmpl/fd_sort.c"
222 :
223 : #define SORT_NAME fd_sort_dn_short
224 : #define SORT_KEY_T short
225 : #define SORT_IMPL_STYLE 1
226 : #include "../tmpl/fd_sort.c"
227 :
228 : #define SORT_NAME fd_sort_dn_int
229 : #define SORT_KEY_T int
230 : #define SORT_IMPL_STYLE 1
231 : #include "../tmpl/fd_sort.c"
232 :
233 : #define SORT_NAME fd_sort_dn_long
234 : #define SORT_KEY_T long
235 : #define SORT_IMPL_STYLE 1
236 : #include "../tmpl/fd_sort.c"
237 :
238 : #define SORT_NAME fd_sort_dn_uchar
239 : #define SORT_KEY_T uchar
240 : #define SORT_IMPL_STYLE 1
241 : #include "../tmpl/fd_sort.c"
242 :
243 : #define SORT_NAME fd_sort_dn_ushort
244 : #define SORT_KEY_T ushort
245 : #define SORT_IMPL_STYLE 1
246 : #include "../tmpl/fd_sort.c"
247 :
248 : #define SORT_NAME fd_sort_dn_uint
249 : #define SORT_KEY_T uint
250 : #define SORT_IMPL_STYLE 1
251 : #include "../tmpl/fd_sort.c"
252 :
253 : #define SORT_NAME fd_sort_dn_ulong
254 : #define SORT_KEY_T ulong
255 : #define SORT_IMPL_STYLE 1
256 : #include "../tmpl/fd_sort.c"
257 :
258 : #if FD_HAS_INT128
259 : #define SORT_NAME fd_sort_dn_int128
260 : #define SORT_KEY_T int128
261 : #define SORT_IMPL_STYLE 1
262 : #include "../tmpl/fd_sort.c"
263 :
264 : #define SORT_NAME fd_sort_dn_uint128
265 : #define SORT_KEY_T uint128
266 : #define SORT_IMPL_STYLE 1
267 : #include "../tmpl/fd_sort.c"
268 : #endif
269 :
270 : #define SORT_NAME fd_sort_dn_float
271 : #define SORT_KEY_T float
272 : #define SORT_IMPL_STYLE 1
273 : #include "../tmpl/fd_sort.c"
274 :
275 : #if FD_HAS_DOUBLE
276 : #define SORT_NAME fd_sort_dn_double
277 : #define SORT_KEY_T double
278 : #define SORT_IMPL_STYLE 1
279 : #include "../tmpl/fd_sort.c"
280 : #endif
281 :
282 : FD_PROTOTYPES_END
283 :
284 : #endif /* HEADER_fd_src_util_math_fd_stat_h */
285 :
|