Line data Source code
1 : #ifndef HEADER_fd_src_cstr_fd_cstr_h
2 : #define HEADER_fd_src_cstr_fd_cstr_h
3 :
4 : /* APIs for manipulating '\0'-terminated character strings ("cstr") */
5 :
6 : #include "../bits/fd_bits.h"
7 :
8 : FD_PROTOTYPES_BEGIN
9 :
10 : /* cstr input *********************************************************/
11 :
12 : /* fd_cstr_to_T converts the cstr pointed at by s into a T and returns
13 : its value. Caller promises s is non-NULL and points at a cstr.
14 :
15 : Note fd_cstr_to_cstr just returns s. As such the lifetime of the
16 : returned pointer is the lifetime s and ownership model of the
17 : underlying s is defined by the application.
18 :
19 : fd_cstr_to_char just returns the first character of the cstr (if cstr
20 : is the empty string, this will be the '\0' character ... otherwise,
21 : it will be a normal string character). As char do not have a
22 : consistent interpretation between platforms due to issues with the
23 : language standard itself, the value here should just be treated as a
24 : character and not an integer. Use fd_cstr_schar/fd_cstr_uchar if you
25 : need to treat a char as an integer.
26 :
27 : fd_cstr_to_cstr and fd_cstr_to_char exist primarily for type system
28 : completeness / facilitate various generic programming practices.
29 :
30 : The integer converters work in the strtol sense with base 0 (and thus
31 : ignore leading whitespace, handle leading signs and assume octal if
32 : the body is prefixed with 0, hexadecimal if prefixed with 0x and
33 : decimal otherwise). */
34 :
35 : FD_FN_CONST char const * fd_cstr_to_cstr ( char const * s );
36 : FD_FN_PURE char fd_cstr_to_char ( char const * s );
37 : FD_FN_PURE schar fd_cstr_to_schar ( char const * s );
38 : FD_FN_PURE short fd_cstr_to_short ( char const * s );
39 : FD_FN_PURE int fd_cstr_to_int ( char const * s );
40 : FD_FN_PURE long fd_cstr_to_long ( char const * s );
41 : FD_FN_PURE uchar fd_cstr_to_uchar ( char const * s );
42 : FD_FN_PURE ushort fd_cstr_to_ushort( char const * s );
43 : FD_FN_PURE uint fd_cstr_to_uint ( char const * s );
44 : FD_FN_PURE ulong fd_cstr_to_ulong ( char const * s );
45 : FD_FN_PURE float fd_cstr_to_float ( char const * s );
46 : #if FD_HAS_DOUBLE
47 : FD_FN_PURE double fd_cstr_to_double( char const * s );
48 : #endif
49 :
50 : /* fd_cstr_to_ulong_octal is the same as fd_cstr_to_ulong but assumes s
51 : points is octal. This is mostly used when dealing parsing UNIX style
52 : file permissions. */
53 :
54 : FD_FN_PURE ulong fd_cstr_to_ulong_octal( char const * s );
55 :
56 : /* fd_cstr_to_ulong_seq populates seq (which has room for seq max items)
57 : with the sequenced specified by the given cstr. Sequences are a
58 : comma separated list of ranges (e.g. "R0,R1,R2"). The ranges
59 : themselves can be themselves be individual integers (e.g. "5") or a
60 : simple range (e.g. "4-8", includes both endpoints, stop should be at
61 : least start), a range with a skip (e.g. "1-10/3" or "1-10:3", stop
62 : should be at least start and stride should be positive). Ignores
63 : internal whitespace. Robust against overflow / wrapping of ranges
64 : against ULONG_MAX. Items may appear in multiple times and sequences
65 : can have an arbitrary order. Caller promises seq is non-NULL if max
66 : is non-zero. Returns 0 on NULL or malformed cstr or empty sequence
67 : (seq contents might have been arbitrarily clobbered on a malformed
68 : cstr). */
69 :
70 : ulong /* Actual sequence length, if greater than seq_max returned sequence truncated. */
71 : fd_cstr_to_ulong_seq( char const * cstr, /* String to parse, NULL returns 0 */
72 : ulong * seq, /* Indexed [0,max), elements [0,min(actual sequence length,seq_max)) populated with
73 : the leading portion of the seq. Any remaining elements of seq are untouched. */
74 : ulong seq_max ); /* Maximum sequence length */
75 :
76 : /* fd_cstr_hash hashes the cstr pointed to by key to a ulong.
77 : fd_cstr_hash_append updates the hash value (it will be as though the
78 : fd_cstr_hash was called on the string concatenation of the all the
79 : keys provided to hash / hash append in order). Treats key==NULL the
80 : same as the empty string "". Yields identical cross platform results
81 : regardless of how the platform treats the sign of char. Based on one
82 : of the djb2 hash variants (public domain).
83 :
84 : FIXME: This is simple and fast and pretty good practically for string
85 : hashing but more robust and faster algos are probably out there. */
86 :
87 : FD_FN_PURE static inline ulong
88 : fd_cstr_hash_append( ulong hash,
89 3749043 : char const * key ) {
90 3749043 : if( FD_LIKELY( key ) ) {
91 3749043 : uchar const * p = (uchar const *)key;
92 145087742 : for(;;) {
93 145087742 : ulong c = p[0];
94 145087742 : if( FD_UNLIKELY( !c ) ) break;
95 141338698 : hash = (hash*33UL) ^ c;
96 141338698 : p++;
97 141338698 : }
98 3749043 : }
99 3749043 : return hash;
100 3749043 : }
101 :
102 0 : FD_FN_PURE static inline ulong fd_cstr_hash( char const * key ) { return fd_cstr_hash_append( 5381UL, key ); }
103 :
104 : /* fd_cstr_casecmp is equivalent to strcasecmp but doesn't require
105 : FD_HAS_HOSTED (POSIX) support. */
106 :
107 : FD_FN_PURE int
108 : fd_cstr_casecmp( char const * a,
109 : char const * b );
110 :
111 : /* fd_cstr_nlen is equivalent to strnlen but doesn't require
112 : FD_HAS_HOSTED (POSIX) support. */
113 :
114 : FD_FN_PURE ulong
115 : fd_cstr_nlen( char const * s,
116 : ulong m );
117 :
118 : /* cstr output ********************************************************/
119 :
120 : /* fd_cstr_printf printf a cstr into the sz byte memory region pointed
121 : to by buf. Always returns buf.
122 :
123 : If buf is non-NULL and sz is non-zero, on return, buf will point to a
124 : cstr such that strlen(buf)<sz. That is, bytes [0,strlen(buf)] will
125 : be non-'\0', byte strlen(buf) will be '\0' and bytes (len,sz) will be
126 : unchanged. If more than sz bytes are needed to hold the requested
127 : cstr, the cstr will be truncated to its leading bytes such that
128 : strlen(buf)==sz-1. If opt_len is non-NULL, *opt_len will be set to
129 : the strlen(buf) on return.
130 :
131 : buf==NULL and/or sz==0UL are treated as a no-op. (If opt_len is
132 : non-NULL *opt_len wll be 0UL on return ... this is debatable though
133 : given the strlen(buf) property above. Might be better to this case
134 : as U.B., or abort if opt_len is requested when buf==NULL and sz==NULL
135 : or return ULONG_MAX in opt_len (-1) to indicate ill defined usage or
136 : ...) */
137 :
138 : char *
139 : fd_cstr_printf( char * buf,
140 : ulong sz,
141 : ulong * opt_len,
142 : char const * fmt, ... ) __attribute__((format(printf,4,5)));
143 :
144 : /* fd_cstr_printf_check is the same as fd_cstr_printf except that it
145 : returns 1 if the entire cstr, including the NUL terminating
146 : character was written to buf and 0 otherwise.
147 :
148 : If the cstr was truncated, or there was an error in the printf
149 : formatting process, 0 will be returned. Otherwise, on success, 1
150 : will be returned. If zero bytes are written to buf because the
151 : format string is empty, the return value will be 1. */
152 :
153 : int
154 : fd_cstr_printf_check( char * buf,
155 : ulong sz,
156 : ulong * opt_len,
157 : char const * fmt, ... ) __attribute__((format(printf,4,5)));
158 :
159 : /* fd_cstr_init start writing a cstr into buf. Returns where the first
160 : character of the cstr should be written (==buf). */
161 :
162 53448 : static inline char * fd_cstr_init( char * buf ) { return buf; }
163 :
164 : /* fd_cstr_fini finished writing a cstr to buf. Assumes p is valid
165 : (non-NULL and room for the terminating '\0'). At this point, the buf
166 : passed to fd_cstr_init will be properly '\0' terminated. */
167 :
168 53490 : static inline void fd_cstr_fini( char * p ) { *p = '\0'; }
169 :
170 : /* fd_cstr_append_char append character c to cstr. Assumes p is valid
171 : (non-NULL and room for at least this char and a final terminating
172 : '\0') and c is not '\0' */
173 :
174 1821423 : static inline char * fd_cstr_append_char( char * p, char c ) { *(p++) = c; return p; }
175 :
176 : /* fd_cstr_append_text appends n characters of text pointed to by t to
177 : p. Assumes p is valid (non-NULL and room for at least n characters
178 : and a final terminating '\0') and t is valid (points to n consecutive
179 : non-'\0' characters). n is zero is fine. */
180 :
181 : static inline char *
182 : fd_cstr_append_text( char * p,
183 : char const * t,
184 253350 : ulong n ) {
185 253350 : fd_memcpy( p, t, n );
186 253350 : return p + n;
187 253350 : }
188 :
189 : /* fd_cstr_append_cstr appends the cstr pointed to by s to p. Assumes p
190 : is valid (non-NULL and room for at least strlen( s ) characters and a
191 : final terminating '\0'). s==NULL is treated as a no-op. */
192 :
193 : static inline char *
194 : fd_cstr_append_cstr( char * p,
195 48381 : char const * s ) {
196 48381 : if( FD_UNLIKELY( !s ) ) return p;
197 48381 : ulong n = strlen( s );
198 48381 : fd_memcpy( p, s, n );
199 48381 : return p + n;
200 48381 : }
201 :
202 : /* fd_cstr_append_cstr_safe appends up to n chars of the cstr pointed
203 : to by to p. Assumes p is valid (non-NULL and room for at least n
204 : characters and a final terminating '\0'). s==NULL is treated as a
205 : no-op. */
206 :
207 : static inline char *
208 : fd_cstr_append_cstr_safe( char * p,
209 : char const * s,
210 4611 : ulong n ) {
211 4611 : if( FD_UNLIKELY( !s ) ) return p;
212 4611 : ulong l = fd_ulong_min( strlen( s ), n );
213 4611 : fd_memcpy( p, s, l );
214 4611 : return p + l;
215 4611 : }
216 :
217 : /* fd_cstr_append_printf appends the printf of the fmt string into p.
218 : Assumes p is valid (non-NULL and room for printf characters and a
219 : final terminating '\0'). */
220 :
221 : char *
222 : fd_cstr_append_printf( char * p,
223 : char const * fmt, ... ) __attribute__((format(printf,2,3)));
224 :
225 : /* fd_cstr_append_ulong_as_text pretty prints the ulong into p (and
226 : similarly for the other unsigned integer types). Assumes p is valid
227 : (non-NULL and room for at least n characters and a final terminating
228 : '\0'), x is small enough to pretty print to n chars (which implies
229 : that n is at least 1). ws is the character to left pad the converted
230 : value with. pfx is prefix character to use (e.g. '+', '-'), '\0'
231 : indicates no prefix. If a prefix is requested, it will be
232 : immediately before the most significant converted character. */
233 :
234 : static inline char *
235 : fd_cstr_append_uint_as_text( char * p,
236 : char ws,
237 : char pm,
238 : uint x,
239 1517657 : ulong n ) {
240 1517657 : char * p0 = p;
241 1517657 : p += n;
242 1517657 : char * q = p;
243 3289263 : do { uint d = x % 10U; x /= 10U; *(--q) = (char)( d + (uint)'0' ); } while( x );
244 1517657 : if( pm ) *(--q) = pm;
245 1770772 : while( p0<q ) *(p0++) = ws;
246 1517657 : return p;
247 1517657 : }
248 :
249 : static inline char *
250 : fd_cstr_append_ulong_as_text( char * p,
251 : char ws,
252 : char pm,
253 : ulong x,
254 741 : ulong n ) {
255 741 : char * p0 = p;
256 741 : p += n;
257 741 : char * q = p;
258 6813 : do { ulong d = x % 10UL; x /= 10UL; *(--q) = (char)( d + (ulong)'0' ); } while( x );
259 741 : if( pm ) *(--q) = pm;
260 993 : while( p0<q ) *(p0++) = ws;
261 741 : return p;
262 741 : }
263 :
264 : #if FD_HAS_INT128
265 :
266 : static inline char *
267 : fd_cstr_append_uint128_as_text( char * p,
268 : char ws,
269 : char pm,
270 : uint128 x,
271 0 : ulong n ) {
272 0 : char * p0 = p;
273 0 : p += n;
274 0 : char * q = p;
275 0 : do { uint128 d = x % (uint128)10UL; x /= (uint128)10UL; *(--q) = (char)( d + (uint128)'0' ); } while( x );
276 0 : if( pm ) *(--q) = pm;
277 0 : while( p0<q ) *(p0++) = ws;
278 0 : return p;
279 0 : }
280 :
281 : #endif
282 :
283 : static inline char *
284 : fd_cstr_append_uchar_as_text ( char * p,
285 : char ws,
286 : char pm,
287 : uchar x,
288 96 : ulong n ) {
289 96 : return fd_cstr_append_uint_as_text( p, ws, pm, (uint)x, n );
290 96 : }
291 :
292 : static inline char *
293 : fd_cstr_append_ushort_as_text( char * p,
294 : char ws,
295 : char pm,
296 : ushort x,
297 144 : ulong n ) {
298 144 : return fd_cstr_append_uint_as_text( p, ws, pm, (uint)x, n );
299 144 : }
300 :
301 : /* fd_cstr_append_fxp10_as_text same as the above but for the decimal
302 : fixed point value:
303 : x / 10^f
304 : Assumes p is valid (non-NULL and room for at least n characters and a
305 : final terminating '\0'), x / 10^f is not too large to fit within n
306 : characters (which implies that n is at least f+2). ws is the
307 : character to left pad the converted value with. pfx is prefix
308 : character to use (e.g. '+', '-'), '\0' indicates no prefix. If a
309 : prefix is requested, it will be immediately before the most
310 : significant converted character. */
311 :
312 : FD_FN_UNUSED static char * /* Work around -Winline */
313 : fd_cstr_append_fxp10_as_text( char * p,
314 : char ws,
315 : char pm,
316 : ulong f,
317 : ulong x,
318 254880 : ulong n ) {
319 254880 : char * p0 = p;
320 254880 : p += n;
321 254880 : char * q = p;
322 2539728 : while( f ) { ulong d = x % 10UL; x /= 10UL; *(--q) = (char)( d + (ulong)'0' ); f--; }
323 254880 : *(--q) = '.';
324 451469 : do { ulong d = x % 10UL; x /= 10UL; *(--q) = (char)( d + (ulong)'0' ); } while( x );
325 254880 : if( pm ) *(--q) = pm;
326 323971 : while( p0<q ) *(p0++) = ws;
327 254880 : return p;
328 254880 : }
329 :
330 : /* fd_cstr_tokenize tokenizes the cstr of the form whose first
331 : byte is pointed to by cstr:
332 :
333 : [WS][TOKEN 0][DELIM][WS][TOKEN 1][DELIM]...[WS][TOKEN N]{[DELIM][WS][NUL],[NUL]}
334 :
335 : in-place, into:
336 :
337 : [WS][TOKEN 0][NUL][WS][TOKEN 1][NUL]...[WS][TOKEN tok_cnt-1][NUL]
338 :
339 : and returns tok_cnt.
340 :
341 : Further, on return, tok[i] for i in [0,min(tok_cnt,tok_max)) where
342 : tok_cnt is the number of tokens in cstr will point to the first
343 : byte of each token. Due to the tokenization, each one of these will
344 : be properly '\0' terminated.
345 :
346 : Above, [WS] is a sequence of zero or more whitespace characters,
347 : [TOKEN *] are a sequence of zero or more non-delim and non-NUL
348 : characters and delim is assumed to be a non-NUL non-whitespace
349 : character (e.g. ',').
350 :
351 : As such:
352 : - The original cstr is clobbered by this call.
353 : - tok[*] point to a properly terminated cstr into the original cstr
354 : on return. They thus have the same lifetime issues as the original
355 : cstr.
356 : - If tok_cnt > tok_max, tok wasn't large enough to hold all the
357 : tokens found in the cstr. Only the first max are available in
358 : tok[*] (the entire string was still tokenized though).
359 : - Found tokens will not have any leading whitespace.
360 : - Found tokens might have internal or trailing whitespace.
361 : - Zero length tokens are possible. E.g. assuming delim==':', the cstr
362 : "a: b::d: :f" has the tokens: "a", "b", "", "d", "", "f".
363 : - If the final token is zero length, it should use an explicit
364 : delimiter. E.g. assuming delim=='|':
365 : "a|b" has tokens "a", "b"
366 : "a|b|" has tokens "a", "b"
367 : "a|b| " has tokens "a", "b"
368 : "a|b||" has tokens "a", "b", ""
369 : "a|b| |" has tokens "a", "b", ""
370 : "a|b| | " has tokens "a", "b", ""
371 : - This is also true if the final token is the initial token. E.g.
372 : assuming delim==';':
373 : "" has no tokens
374 : " " has no tokens
375 : ";" has the token ""
376 : " ;" has the token ""
377 : " ; " has the token "" */
378 :
379 : ulong
380 : fd_cstr_tokenize( char ** tok,
381 : ulong tok_max,
382 : char * cstr,
383 : char delim );
384 :
385 : /* fd_cstr_append_utf8 appends the UTF-8 encoding of a Unicode code
386 : point into p. Assumes p is valid (non-NULL and room for 1-4 chars
387 : and a final terminating '\0'). */
388 :
389 : static inline char *
390 : fd_cstr_append_utf8( char * p,
391 0 : uint rune ) {
392 0 : if( FD_LIKELY( rune<=0x7f ) ) {
393 0 : *(p++) = (char)rune;
394 0 : } else if( rune<=0x7ff ) {
395 0 : *(p++) = (char)( 0xc0 | (rune>>6) );
396 0 : *(p++) = (char)( 0x80 | ((rune )&0x3f) );
397 0 : } else if( rune<=0xffff ) {
398 0 : *(p++) = (char)( 0xe0 | (rune>>12) );
399 0 : *(p++) = (char)( 0x80 | ((rune>> 6)&0x3f) );
400 0 : *(p++) = (char)( 0x80 | ((rune )&0x3f) );
401 0 : } else if( rune<=0x10ffff ) {
402 0 : *(p++) = (char)( 0xf0 | (rune>>18) );
403 0 : *(p++) = (char)( 0x80 | ((rune>>12)&0x3f) );
404 0 : *(p++) = (char)( 0x80 | ((rune>> 6)&0x3f) );
405 0 : *(p++) = (char)( 0x80 | (rune &0x3f) );
406 0 : } else {
407 : /* replacement char */
408 0 : *(p++) = (char)0xef;
409 0 : *(p++) = (char)0xbf;
410 0 : *(p++) = (char)0xbd;
411 0 : }
412 0 : return p;
413 0 : }
414 :
415 : FD_PROTOTYPES_END
416 :
417 : #endif /* HEADER_fd_src_cstr_fd_cstr_h */
|