LCOV - code coverage report
Current view: top level - util/cstr - fd_cstr.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 66 98 67.3 %
Date: 2024-11-13 11:58:15 Functions: 39 9735 0.4 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_cstr_fd_cstr_h
       2             : #define HEADER_fd_src_cstr_fd_cstr_h
       3             : 
       4             : /* APIs for manipulating '\0'-terminated character strings ("cstr") */
       5             : 
       6             : #include "../bits/fd_bits.h"
       7             : 
       8             : FD_PROTOTYPES_BEGIN
       9             : 
      10             : /* cstr input *********************************************************/
      11             : 
      12             : /* fd_cstr_to_T converts the cstr pointed at by s into a T and returns
      13             :    its value.  Caller promises s is non-NULL and points at a cstr.
      14             : 
      15             :    Note fd_cstr_to_cstr just returns s.  As such the lifetime of the
      16             :    returned pointer is the lifetime s and ownership model of the
      17             :    underlying s is defined by the application.
      18             : 
      19             :    fd_cstr_to_char just returns the first character of the cstr (if cstr
      20             :    is the empty string, this will be the '\0' character ... otherwise,
      21             :    it will be a normal string character).  As char do not have a
      22             :    consistent interpretation between platforms due to issues with the
      23             :    language standard itself, the value here should just be treated as a
      24             :    character and not an integer.  Use fd_cstr_schar/fd_cstr_uchar if you
      25             :    need to treat a char as an integer.
      26             : 
      27             :    fd_cstr_to_cstr and fd_cstr_to_char exist primarily for type system
      28             :    completeness / facilitate various generic programming practices.
      29             : 
      30             :    The integer converters work in the strtol sense with base 0 (and thus
      31             :    ignore leading whitespace, handle leading signs and assume octal if
      32             :    the body is prefixed with 0, hexadecimal if prefixed with 0x and
      33             :    decimal otherwise). */
      34             : 
      35             : FD_FN_CONST char const * fd_cstr_to_cstr  ( char const * s );
      36             : FD_FN_PURE  char         fd_cstr_to_char  ( char const * s );
      37             : FD_FN_PURE  schar        fd_cstr_to_schar ( char const * s );
      38             : FD_FN_PURE  short        fd_cstr_to_short ( char const * s );
      39             : FD_FN_PURE  int          fd_cstr_to_int   ( char const * s );
      40             : FD_FN_PURE  long         fd_cstr_to_long  ( char const * s );
      41             : FD_FN_PURE  uchar        fd_cstr_to_uchar ( char const * s );
      42             : FD_FN_PURE  ushort       fd_cstr_to_ushort( char const * s );
      43             : FD_FN_PURE  uint         fd_cstr_to_uint  ( char const * s );
      44             : FD_FN_PURE  ulong        fd_cstr_to_ulong ( char const * s );
      45             : FD_FN_PURE  float        fd_cstr_to_float ( char const * s );
      46             : #if FD_HAS_DOUBLE
      47             : FD_FN_PURE  double       fd_cstr_to_double( char const * s );
      48             : #endif
      49             : 
      50             : /* fd_cstr_to_ulong_octal is the same as fd_cstr_to_ulong but assumes s
      51             :    points is octal.  This is mostly used when dealing parsing UNIX style
      52             :    file permissions. */
      53             : 
      54             : FD_FN_PURE ulong fd_cstr_to_ulong_octal( char const * s );
      55             : 
      56             : /* fd_cstr_to_ulong_seq populates seq (which has room for seq max items)
      57             :    with the sequenced specified by the given cstr.  Sequences are a
      58             :    comma separated list of ranges (e.g. "R0,R1,R2").  The ranges
      59             :    themselves can be themselves be individual integers (e.g. "5") or a
      60             :    simple range (e.g. "4-8", includes both endpoints, stop should be at
      61             :    least start), a range with a skip (e.g. "1-10/3" or "1-10:3", stop
      62             :    should be at least start and stride should be positive).  Ignores
      63             :    internal whitespace.  Robust against overflow / wrapping of ranges
      64             :    against ULONG_MAX.  Items may appear in multiple times and sequences
      65             :    can have an arbitrary order.  Caller promises seq is non-NULL if max
      66             :    is non-zero.  Returns 0 on NULL or malformed cstr or empty sequence
      67             :    (seq contents might have been arbitrarily clobbered on a malformed
      68             :    cstr). */
      69             : 
      70             : ulong                                         /* Actual sequence length, if greater than seq_max returned sequence truncated. */
      71             : fd_cstr_to_ulong_seq( char const * cstr,      /* String to parse, NULL returns 0 */
      72             :                       ulong *      seq,       /* Indexed [0,max), elements [0,min(actual sequence length,seq_max)) populated with
      73             :                                                  the leading portion of the seq.  Any remaining elements of seq are untouched. */
      74             :                       ulong        seq_max ); /* Maximum sequence length */
      75             : 
      76             : /* fd_cstr_hash hashes the cstr pointed to by key to a ulong.
      77             :    fd_cstr_hash_append updates the hash value (it will be as though the
      78             :    fd_cstr_hash was called on the string concatenation of the all the
      79             :    keys provided to hash / hash append in order).  Treats key==NULL the
      80             :    same as the empty string "".  Yields identical cross platform results
      81             :    regardless of how the platform treats the sign of char.  Based on one
      82             :    of the djb2 hash variants (public domain).
      83             : 
      84             :    FIXME: This is simple and fast and pretty good practically for string
      85             :    hashing but more robust and faster algos are probably out there. */
      86             : 
      87             : FD_FN_PURE static inline ulong
      88             : fd_cstr_hash_append( ulong        hash,
      89     3749043 :                      char const * key ) {
      90     3749043 :   if( FD_LIKELY( key ) ) {
      91     3749043 :     uchar const * p = (uchar const *)key;
      92   145087742 :     for(;;) {
      93   145087742 :       ulong c = p[0];
      94   145087742 :       if( FD_UNLIKELY( !c ) ) break;
      95   141338698 :       hash = (hash*33UL) ^ c;
      96   141338698 :       p++;
      97   141338698 :     }
      98     3749043 :   }
      99     3749043 :   return hash;
     100     3749043 : }
     101             : 
     102           0 : FD_FN_PURE static inline ulong fd_cstr_hash( char const * key ) { return fd_cstr_hash_append( 5381UL, key ); }
     103             : 
     104             : /* fd_cstr_casecmp is equivalent to strcasecmp but doesn't require
     105             :    FD_HAS_HOSTED (POSIX) support. */
     106             : 
     107             : FD_FN_PURE int
     108             : fd_cstr_casecmp( char const * a,
     109             :                  char const * b );
     110             : 
     111             : /* fd_cstr_nlen is equivalent to strnlen but doesn't require
     112             :    FD_HAS_HOSTED (POSIX) support. */
     113             : 
     114             : FD_FN_PURE ulong
     115             : fd_cstr_nlen( char const * s,
     116             :               ulong        m );
     117             : 
     118             : /* cstr output ********************************************************/
     119             : 
     120             : /* fd_cstr_printf printf a cstr into the sz byte memory region pointed
     121             :    to by buf.  Always returns buf.
     122             : 
     123             :    If buf is non-NULL and sz is non-zero, on return, buf will point to a
     124             :    cstr such that strlen(buf)<sz.  That is, bytes [0,strlen(buf)] will
     125             :    be non-'\0', byte strlen(buf) will be '\0' and bytes (len,sz) will be
     126             :    unchanged.  If more than sz bytes are needed to hold the requested
     127             :    cstr, the cstr will be truncated to its leading bytes such that
     128             :    strlen(buf)==sz-1.  If opt_len is non-NULL, *opt_len will be set to
     129             :    the strlen(buf) on return.
     130             : 
     131             :    buf==NULL and/or sz==0UL are treated as a no-op.  (If opt_len is
     132             :    non-NULL *opt_len wll be 0UL on return ... this is debatable though
     133             :    given the strlen(buf) property above.  Might be better to this case
     134             :    as U.B., or abort if opt_len is requested when buf==NULL and sz==NULL
     135             :    or return ULONG_MAX in opt_len (-1) to indicate ill defined usage or
     136             :    ...) */
     137             : 
     138             : char *
     139             : fd_cstr_printf( char *       buf,
     140             :                 ulong        sz,
     141             :                 ulong *      opt_len,
     142             :                 char const * fmt, ... ) __attribute__((format(printf,4,5)));
     143             : 
     144             : /* fd_cstr_printf_check is the same as fd_cstr_printf except that it
     145             :    returns 1 if the entire cstr, including the NUL terminating
     146             :    character was written to buf and 0 otherwise.
     147             : 
     148             :    If the cstr was truncated, or there was an error in the printf
     149             :    formatting process, 0 will be returned.  Otherwise, on success, 1
     150             :    will be returned.  If zero bytes are written to buf because the
     151             :    format string is empty, the return value will be 1. */
     152             : 
     153             : int
     154             : fd_cstr_printf_check( char *       buf,
     155             :                       ulong        sz,
     156             :                       ulong *      opt_len,
     157             :                       char const * fmt, ... ) __attribute__((format(printf,4,5)));
     158             : 
     159             : /* fd_cstr_init start writing a cstr into buf.  Returns where the first
     160             :    character of the cstr should be written (==buf). */
     161             : 
     162       53448 : static inline char * fd_cstr_init( char * buf ) { return buf; }
     163             : 
     164             : /* fd_cstr_fini finished writing a cstr to buf.  Assumes p is valid
     165             :    (non-NULL and room for the terminating '\0').  At this point, the buf
     166             :    passed to fd_cstr_init will be properly '\0' terminated. */
     167             : 
     168       53490 : static inline void fd_cstr_fini( char * p ) { *p = '\0'; }
     169             : 
     170             : /* fd_cstr_append_char append character c to cstr.  Assumes p is valid
     171             :    (non-NULL and room for at least this char and a final terminating
     172             :    '\0') and c is not '\0' */
     173             : 
     174     1821423 : static inline char * fd_cstr_append_char( char * p, char c ) { *(p++) = c; return p; }
     175             : 
     176             : /* fd_cstr_append_text appends n characters of text pointed to by t to
     177             :    p.  Assumes p is valid (non-NULL and room for at least n characters
     178             :    and a final terminating '\0') and t is valid (points to n consecutive
     179             :    non-'\0' characters).  n is zero is fine. */
     180             : 
     181             : static inline char *
     182             : fd_cstr_append_text( char *       p,
     183             :                      char const * t,
     184      253350 :                      ulong        n ) {
     185      253350 :   fd_memcpy( p, t, n );
     186      253350 :   return p + n;
     187      253350 : }
     188             : 
     189             : /* fd_cstr_append_cstr appends the cstr pointed to by s to p.  Assumes p
     190             :    is valid (non-NULL and room for at least strlen( s ) characters and a
     191             :    final terminating '\0').  s==NULL is treated as a no-op. */
     192             : 
     193             : static inline char *
     194             : fd_cstr_append_cstr( char *       p,
     195       48381 :                      char const * s ) {
     196       48381 :   if( FD_UNLIKELY( !s ) ) return p;
     197       48381 :   ulong n = strlen( s );
     198       48381 :   fd_memcpy( p, s, n );
     199       48381 :   return p + n;
     200       48381 : }
     201             : 
     202             : /* fd_cstr_append_cstr_safe appends up to n chars of the cstr pointed
     203             :    to by to p.  Assumes p is valid (non-NULL and room for at least n
     204             :    characters and a final terminating '\0').  s==NULL is treated as a
     205             :    no-op. */
     206             : 
     207             : static inline char *
     208             : fd_cstr_append_cstr_safe( char *       p,
     209             :                           char const * s,
     210        4611 :                           ulong        n ) {
     211        4611 :   if( FD_UNLIKELY( !s ) ) return p;
     212        4611 :   ulong l = fd_ulong_min( strlen( s ), n );
     213        4611 :   fd_memcpy( p, s, l );
     214        4611 :   return p + l;
     215        4611 : }
     216             : 
     217             : /* fd_cstr_append_printf appends the printf of the fmt string into p.
     218             :    Assumes p is valid (non-NULL and room for printf characters and a
     219             :    final terminating '\0'). */
     220             : 
     221             : char *
     222             : fd_cstr_append_printf( char *       p,
     223             :                        char const * fmt, ... ) __attribute__((format(printf,2,3)));
     224             : 
     225             : /* fd_cstr_append_ulong_as_text pretty prints the ulong into p (and
     226             :    similarly for the other unsigned integer types).  Assumes p is valid
     227             :    (non-NULL and room for at least n characters and a final terminating
     228             :    '\0'), x is small enough to pretty print to n chars (which implies
     229             :    that n is at least 1).  ws is the character to left pad the converted
     230             :    value with.  pfx is prefix character to use (e.g. '+', '-'), '\0'
     231             :    indicates no prefix.  If a prefix is requested, it will be
     232             :    immediately before the most significant converted character. */
     233             : 
     234             : static inline char *
     235             : fd_cstr_append_uint_as_text( char * p,
     236             :                              char   ws,
     237             :                              char   pm,
     238             :                              uint   x,
     239     1517657 :                              ulong  n ) {
     240     1517657 :   char * p0 = p;
     241     1517657 :   p += n;
     242     1517657 :   char * q = p;
     243     3289263 :   do { uint d = x % 10U; x /= 10U; *(--q) = (char)( d + (uint)'0' ); } while( x );
     244     1517657 :   if( pm ) *(--q) = pm;
     245     1770772 :   while( p0<q ) *(p0++) = ws;
     246     1517657 :   return p;
     247     1517657 : }
     248             : 
     249             : static inline char *
     250             : fd_cstr_append_ulong_as_text( char * p,
     251             :                               char   ws,
     252             :                               char   pm,
     253             :                               ulong  x,
     254         741 :                               ulong  n ) {
     255         741 :   char * p0 = p;
     256         741 :   p += n;
     257         741 :   char * q = p;
     258        6813 :   do { ulong d = x % 10UL; x /= 10UL; *(--q) = (char)( d + (ulong)'0' ); } while( x );
     259         741 :   if( pm ) *(--q) = pm;
     260         993 :   while( p0<q ) *(p0++) = ws;
     261         741 :   return p;
     262         741 : }
     263             : 
     264             : #if FD_HAS_INT128
     265             : 
     266             : static inline char *
     267             : fd_cstr_append_uint128_as_text( char *  p,
     268             :                                 char    ws,
     269             :                                 char    pm,
     270             :                                 uint128 x,
     271           0 :                                 ulong   n ) {
     272           0 :   char * p0 = p;
     273           0 :   p += n;
     274           0 :   char * q = p;
     275           0 :   do { uint128 d = x % (uint128)10UL; x /= (uint128)10UL; *(--q) = (char)( d + (uint128)'0' ); } while( x );
     276           0 :   if( pm ) *(--q) = pm;
     277           0 :   while( p0<q ) *(p0++) = ws;
     278           0 :   return p;
     279           0 : }
     280             : 
     281             : #endif
     282             : 
     283             : static inline char *
     284             : fd_cstr_append_uchar_as_text ( char * p,
     285             :                                char   ws,
     286             :                                char   pm,
     287             :                                uchar  x,
     288          96 :                                ulong  n ) {
     289          96 :   return fd_cstr_append_uint_as_text( p, ws, pm, (uint)x, n );
     290          96 : }
     291             : 
     292             : static inline char *
     293             : fd_cstr_append_ushort_as_text( char * p,
     294             :                                char   ws,
     295             :                                char   pm,
     296             :                                ushort x,
     297         144 :                                ulong  n ) {
     298         144 :   return fd_cstr_append_uint_as_text( p, ws, pm, (uint)x, n );
     299         144 : }
     300             : 
     301             : /* fd_cstr_append_fxp10_as_text same as the above but for the decimal
     302             :    fixed point value:
     303             :      x / 10^f
     304             :    Assumes p is valid (non-NULL and room for at least n characters and a
     305             :    final terminating '\0'), x / 10^f is not too large to fit within n
     306             :    characters (which implies that n is at least f+2).  ws is the
     307             :    character to left pad the converted value with.  pfx is prefix
     308             :    character to use (e.g. '+', '-'), '\0' indicates no prefix.  If a
     309             :    prefix is requested, it will be immediately before the most
     310             :    significant converted character. */
     311             : 
     312             : FD_FN_UNUSED static char * /* Work around -Winline */
     313             : fd_cstr_append_fxp10_as_text( char * p,
     314             :                               char   ws,
     315             :                               char   pm,
     316             :                               ulong  f,
     317             :                               ulong  x,
     318      254880 :                               ulong  n ) {
     319      254880 :   char * p0 = p;
     320      254880 :   p += n;
     321      254880 :   char * q = p;
     322     2539728 :   while( f ) { ulong d = x % 10UL; x /= 10UL; *(--q) = (char)( d + (ulong)'0' ); f--; }
     323      254880 :   *(--q) = '.';
     324      451469 :   do { ulong d = x % 10UL; x /= 10UL; *(--q) = (char)( d + (ulong)'0' ); } while( x );
     325      254880 :   if( pm ) *(--q) = pm;
     326      323971 :   while( p0<q ) *(p0++) = ws;
     327      254880 :   return p;
     328      254880 : }
     329             : 
     330             : /* fd_cstr_tokenize tokenizes the cstr of the form whose first
     331             :    byte is pointed to by cstr:
     332             : 
     333             :      [WS][TOKEN 0][DELIM][WS][TOKEN 1][DELIM]...[WS][TOKEN N]{[DELIM][WS][NUL],[NUL]}
     334             : 
     335             :    in-place, into:
     336             : 
     337             :      [WS][TOKEN 0][NUL][WS][TOKEN 1][NUL]...[WS][TOKEN tok_cnt-1][NUL]
     338             : 
     339             :    and returns tok_cnt.
     340             : 
     341             :    Further, on return, tok[i] for i in [0,min(tok_cnt,tok_max)) where
     342             :    tok_cnt is the number of tokens in cstr will point to the first
     343             :    byte of each token.  Due to the tokenization, each one of these will
     344             :    be properly '\0' terminated.
     345             : 
     346             :    Above, [WS] is a sequence of zero or more whitespace characters,
     347             :    [TOKEN *] are a sequence of zero or more non-delim and non-NUL
     348             :    characters and delim is assumed to be a non-NUL non-whitespace
     349             :    character (e.g. ',').
     350             : 
     351             :    As such:
     352             :    - The original cstr is clobbered by this call.
     353             :    - tok[*] point to a properly terminated cstr into the original cstr
     354             :      on return.  They thus have the same lifetime issues as the original
     355             :      cstr.
     356             :    - If tok_cnt > tok_max, tok wasn't large enough to hold all the
     357             :      tokens found in the cstr.  Only the first max are available in
     358             :      tok[*] (the entire string was still tokenized though).
     359             :    - Found tokens will not have any leading whitespace.
     360             :    - Found tokens might have internal or trailing whitespace.
     361             :    - Zero length tokens are possible.  E.g. assuming delim==':', the cstr
     362             :      "a: b::d: :f" has the tokens: "a", "b", "", "d", "", "f".
     363             :    - If the final token is zero length, it should use an explicit
     364             :      delimiter.  E.g. assuming delim=='|':
     365             :        "a|b"     has tokens "a", "b"
     366             :        "a|b|"    has tokens "a", "b"
     367             :        "a|b| "   has tokens "a", "b"
     368             :        "a|b||"   has tokens "a", "b", ""
     369             :        "a|b| |"  has tokens "a", "b", ""
     370             :        "a|b| | " has tokens "a", "b", ""
     371             :    - This is also true if the final token is the initial token.  E.g.
     372             :      assuming delim==';':
     373             :        ""    has no tokens
     374             :        " "   has no tokens
     375             :        ";"   has the token ""
     376             :        " ;"  has the token ""
     377             :        " ; " has the token "" */
     378             : 
     379             : ulong
     380             : fd_cstr_tokenize( char ** tok,
     381             :                   ulong   tok_max,
     382             :                   char *  cstr,
     383             :                   char    delim );
     384             : 
     385             : /* fd_cstr_append_utf8 appends the UTF-8 encoding of a Unicode code
     386             :    point into p.  Assumes p is valid (non-NULL and room for 1-4 chars
     387             :    and a final terminating '\0'). */
     388             : 
     389             : static inline char *
     390             : fd_cstr_append_utf8( char * p,
     391           0 :                      uint   rune ) {
     392           0 :   if( FD_LIKELY( rune<=0x7f ) ) {
     393           0 :     *(p++) = (char)rune;
     394           0 :   } else if( rune<=0x7ff ) {
     395           0 :     *(p++) = (char)( 0xc0 |  (rune>>6)       );
     396           0 :     *(p++) = (char)( 0x80 | ((rune   )&0x3f) );
     397           0 :   } else if( rune<=0xffff ) {
     398           0 :     *(p++) = (char)( 0xe0 |  (rune>>12)       );
     399           0 :     *(p++) = (char)( 0x80 | ((rune>> 6)&0x3f) );
     400           0 :     *(p++) = (char)( 0x80 | ((rune    )&0x3f) );
     401           0 :   } else if( rune<=0x10ffff ) {
     402           0 :     *(p++) = (char)( 0xf0 |  (rune>>18)       );
     403           0 :     *(p++) = (char)( 0x80 | ((rune>>12)&0x3f) );
     404           0 :     *(p++) = (char)( 0x80 | ((rune>> 6)&0x3f) );
     405           0 :     *(p++) = (char)( 0x80 |  (rune     &0x3f) );
     406           0 :   } else {
     407             :     /* replacement char */
     408           0 :     *(p++) = (char)0xef;
     409           0 :     *(p++) = (char)0xbf;
     410           0 :     *(p++) = (char)0xbd;
     411           0 :   }
     412           0 :   return p;
     413           0 : }
     414             : 
     415             : FD_PROTOTYPES_END
     416             : 
     417             : #endif /* HEADER_fd_src_cstr_fd_cstr_h */

Generated by: LCOV version 1.14