Line data Source code
1 : #ifndef HEADER_fd_src_ballet_pb_fd_pb_encode_h
2 : #define HEADER_fd_src_ballet_pb_fd_pb_encode_h
3 :
4 : /* fd_pb_encode.h is a header-only Protobuf wire format encoder. It
5 : is schema agnostic and designed for embedded use. Data is written
6 : out in a streaming manner, with occasional fixups of length prefixes
7 : when writing submessages.
8 :
9 : Using this API requires understanding how Protobuf schemas map to the
10 : wire format: https://protobuf.dev/programming-guides/encoding/
11 :
12 : Here is how fd_pb_encode compares to a typical Protobuf encoding
13 : library:
14 : - No dependencies (no protoc)
15 : - No code generation (user hand-writes encoders)
16 : - No memory allocator needed
17 : - No streaming support (message written into a contiguous buffer)
18 : - User error can result in corrupt messages (duplicate or incorrect
19 : tags, etc)
20 : - Space efficiently is slightly worse (larger length-prefixes for
21 : submessages) */
22 :
23 : #include "fd_pb_wire.h"
24 : #include "../../util/log/fd_log.h"
25 :
26 : /* FD_PB_ENCODE_DEPTH_MAX specifies the max submessage depth. */
27 :
28 0 : #define FD_PB_ENCODER_DEPTH_MAX (63UL)
29 :
30 : /* The pb_encoder class is used to serialize Protobuf messages. The
31 : user calls fd_pb_encode_* with each field, top down. */
32 :
33 : struct fd_pb_encoder {
34 :
35 : /* [buf0,buf1) is the encode buffer */
36 : uchar * buf0;
37 : uchar * buf1;
38 :
39 : /* cur points to where the next field is placed */
40 : uchar * cur;
41 :
42 : /* Sub-message nesting depth of new fields (0==topmost) */
43 : uint depth;
44 :
45 : /* offset to length prefix of each depth
46 : lp_off[0] points to the length prefix of a LEN field at depth 0
47 : (the LEN field's submessage is at depth 1) */
48 : uint lp_off[ FD_PB_ENCODER_DEPTH_MAX ];
49 :
50 : };
51 :
52 : typedef struct fd_pb_encoder fd_pb_encoder_t;
53 :
54 : FD_PROTOTYPES_BEGIN
55 :
56 : /* fd_pb_encoder_init creates a Protobuf encoder. Data is written to
57 : the buffer at out (up to out_sz bytes). Returns the initialized
58 : encoder object (has a mutable borrow on out). On return, the
59 : encoder's current message is the topmost message of the encode op.
60 :
61 : out_sz should be slightly overallocated (by 32 bytes) since encoder
62 : bounds checks use overly conservative upper bounds for performance
63 : (i.e. the encoder might fail to add a field with "out of space" if
64 : less than 32 bytes of space is remaining, even if the field would
65 : still fit). */
66 :
67 : static FD_FN_UNUSED fd_pb_encoder_t *
68 : fd_pb_encoder_init( fd_pb_encoder_t * encoder,
69 : uchar * out,
70 0 : ulong out_sz ) {
71 0 : encoder->buf0 = out;
72 0 : encoder->buf1 = out + out_sz;
73 0 : encoder->cur = out;
74 0 : encoder->depth = 0U;
75 0 : return encoder;
76 0 : }
77 :
78 : /* fd_pb_encoder_fini destroys a Protobuf encoder. This is mostly
79 : provided for code cosmetics (does not do any work), since
80 : fd_pb_encoder ensures that the output buffer contains a valid
81 : Protobuf message after every topmost-level write. */
82 :
83 : static FD_FN_UNUSED void *
84 0 : fd_pb_encoder_fini( fd_pb_encoder_t * encoder ) {
85 0 : encoder->buf0 = NULL;
86 0 : encoder->buf1 = NULL;
87 0 : encoder->cur = NULL;
88 0 : encoder->depth = 0;
89 0 : return encoder;
90 0 : }
91 :
92 : /* fd_pb_encoder_out returns a pointer to the first byte of the encoded
93 : output. There is a valid serialized Protobuf message behind this
94 : pointer when the following conditions are true:
95 : - At least one field was fully written at the topmost message
96 : - No write is currently inflight (submessage, length-prefixed, etc) */
97 :
98 : static inline uchar *
99 0 : fd_pb_encoder_out( fd_pb_encoder_t * encoder ) {
100 0 : return encoder->buf0;
101 0 : }
102 :
103 : /* fd_pb_encoder_out_sz returns the number of bytes produced so far. */
104 :
105 : static inline ulong
106 0 : fd_pb_encoder_out_sz( fd_pb_encoder_t * encoder ) {
107 0 : FD_CRIT( encoder->cur >= encoder->buf0, "corrupt encoder state" );
108 0 : return (ulong)encoder->cur - (ulong)encoder->buf0;
109 0 : }
110 :
111 : /* fd_pb_encoder_space returns the number of encoded field bytes that
112 : can be appended to the current submessage. */
113 :
114 : static inline ulong
115 0 : fd_pb_encoder_space( fd_pb_encoder_t const * encoder ) {
116 0 : FD_CRIT( encoder->cur <= encoder->buf1, "corrupt encoder state" );
117 0 : return (ulong)encoder->buf1 - (ulong)encoder->cur;
118 0 : }
119 :
120 : /* fd_pb_lp_open adds a new unknown-sized LEN field to the encoder's
121 : current message. Returns encoder on success. On failure (no space),
122 : returns NULL. Every successful call must be paired with a call to
123 : fd_pb_lp_close. Use fd_pb_encoder_space to check how much free
124 : space the frame has, and fd_pb_encoder_push to append data.
125 :
126 : This method is useful for serializing Protobuf submessages with
127 : another library, moving encoded submessages without a deserialize/
128 : serialize pass, streaming out packed repated fields, iterating over
129 : an unknown-sized sequence, etc. */
130 :
131 : static FD_FN_UNUSED fd_pb_encoder_t *
132 : fd_pb_lp_open( fd_pb_encoder_t * encoder,
133 0 : uint field_id ) {
134 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) <
135 0 : (fd_pb_varint32_sz_max + fd_pb_varint32_sz_max) ) ) {
136 0 : return NULL;
137 0 : }
138 0 : uint depth = encoder->depth++;
139 0 : if( FD_UNLIKELY( depth >= FD_PB_ENCODER_DEPTH_MAX ) ) {
140 : /* unreachable for well-written clients */
141 0 : FD_LOG_WARNING(( "pb_encode failed: submessage nesting depth exceeded" ));
142 0 : return NULL;
143 0 : }
144 : /* Add tag, reserve worst-case space for length-prefix */
145 0 : uint tag = fd_pb_tag( FD_PB_WIRE_TYPE_LEN, field_id );
146 0 : encoder->cur = fd_pb_append_tag( encoder->cur, tag );
147 0 : encoder->lp_off[ depth ] = (uint)( encoder->cur - encoder->buf0 );
148 0 : encoder->cur += fd_pb_varint32_sz_max;
149 0 : return encoder;
150 0 : }
151 :
152 : /* fd_pb_lp_close closes the current LEN field (opened with
153 : fd_pb_lp_open) Returns encoder on success and NULL on failure
154 : (LEN field exceeds max size). */
155 :
156 : static FD_FN_UNUSED fd_pb_encoder_t *
157 0 : fd_pb_lp_close( fd_pb_encoder_t * encoder ) {
158 0 : FD_CRIT( encoder->depth, "unmatched lp_close" );
159 0 : uint depth = --encoder->depth;
160 0 : uchar * lp = encoder->buf0 + encoder->lp_off[ depth ];
161 0 : uchar * sub0 = lp + fd_pb_varint32_sz_max;
162 0 : uchar * sub1 = encoder->cur;
163 0 : ulong sz = (ulong)( sub1-sub0 );
164 0 : FD_CRIT( sub0<=sub1, "corrupt submessage state" );
165 0 : FD_CRIT( sub1<=encoder->buf1, "out-of-bounds write" );
166 0 : if( FD_UNLIKELY( sz>UINT_MAX ) ) {
167 0 : FD_LOG_WARNING(( "pb_encode failed: submessage is too large" ));
168 0 : }
169 0 : fd_pb_append_varint32_sz5( lp, (uint)sz );
170 0 : return encoder;
171 0 : }
172 :
173 : /* fd_pb_submsg_open adds a new submessage to the encoder's current
174 : message, then pivots the current message to the newly created
175 : submessage. Returns encoder on success, NULL on failure (no space
176 : remaining). Every successful call must be paired with a call to
177 : fd_pb_submsg_close. */
178 :
179 : static inline fd_pb_encoder_t *
180 : fd_pb_submsg_open( fd_pb_encoder_t * encoder,
181 0 : uint field_id ) {
182 0 : return fd_pb_lp_open( encoder, field_id );
183 0 : }
184 :
185 : /* fd_pb_submsg_close finishes the encoder's current submessage. Sets
186 : the encoder's current message to the parent message. Returns
187 : encoder on success and NULL on failure (submessage exceeds max msg
188 : size). */
189 :
190 : static inline fd_pb_encoder_t *
191 0 : fd_pb_submsg_close( fd_pb_encoder_t * encoder ) {
192 0 : return fd_pb_lp_close( encoder );
193 0 : }
194 :
195 : /* The below fd_pb_push_<type> methods append a field to the encoder's
196 : current message. type and field_id are the field's parameters as
197 : defined in the schema. value depends on the message type. Note that
198 : the provided type MUST match the schema (e.g. encoding uint32 where
199 : the schema says sint32 would result in memory corruption). Returns
200 : encoder on success, or NULL on failure (out of space).
201 :
202 : fd_pb_push_bool adds a boolean field. value==0 implies false,
203 : otherwise implies true.
204 :
205 : fd_pb_push_{int32,int64} add a signed integer field (optimized for
206 : small unsigned numbers).
207 :
208 : fd_pb_push_{uint32,uint64} add an unsigned integer field (optimized
209 : for small numbers).
210 :
211 : fd_pb_push_{sint32,sint64} add a signed integer field (optimized
212 : for numbers close to zero).
213 :
214 : fd_pb_push_{fixed32,fixed64} add an unsigned integer field
215 : (optimized for very large numbers).
216 :
217 : fd_pb_push_{sfixed32,sfixed64} add a signed integer field
218 : (optimized for very small or large numbers).
219 :
220 : fd_pb_push_{float,double} add a {32,64} bit precision floating-
221 : point field.
222 :
223 : fd_pb_push_bytes adds a byte array field.
224 :
225 : fd_pb_push_string adds a UTF-8 string field.
226 :
227 : fd_pb_push_cstr adds a UTF-8 string field from a NULL-delimited C
228 : string. */
229 :
230 : static inline fd_pb_encoder_t *
231 : fd_pb_push_bool( fd_pb_encoder_t * encoder,
232 : uint field_id,
233 0 : int value ) {
234 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) <
235 0 : (fd_pb_varint32_sz_max + 1) ) ) {
236 0 : return NULL;
237 0 : }
238 0 : uint tag = fd_pb_tag( FD_PB_WIRE_TYPE_VARINT, field_id );
239 0 : encoder->cur = fd_pb_append_tag ( encoder->cur, tag );
240 0 : encoder->cur = fd_pb_append_bool( encoder->cur, value );
241 0 : return encoder;
242 0 : }
243 :
244 : static inline fd_pb_encoder_t *
245 : fd_pb_push_int32( fd_pb_encoder_t * encoder,
246 : uint field_id,
247 0 : int value ) {
248 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) <
249 0 : (fd_pb_varint32_sz_max + fd_pb_varint32_sz_max) ) ) {
250 0 : return NULL;
251 0 : }
252 0 : uint tag = fd_pb_tag( FD_PB_WIRE_TYPE_VARINT, field_id );
253 0 : encoder->cur = fd_pb_append_tag ( encoder->cur, tag );
254 0 : encoder->cur = fd_pb_append_int32( encoder->cur, value );
255 0 : return encoder;
256 0 : }
257 :
258 : static inline fd_pb_encoder_t *
259 : fd_pb_push_int64( fd_pb_encoder_t * encoder,
260 : uint field_id,
261 0 : long value ) {
262 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) <
263 0 : (fd_pb_varint32_sz_max + fd_pb_varint64_sz_max) ) ) {
264 0 : return NULL;
265 0 : }
266 0 : uint tag = fd_pb_tag( FD_PB_WIRE_TYPE_VARINT, field_id );
267 0 : encoder->cur = fd_pb_append_tag ( encoder->cur, tag );
268 0 : encoder->cur = fd_pb_append_int64( encoder->cur, value );
269 0 : return encoder;
270 0 : }
271 :
272 : static inline fd_pb_encoder_t *
273 : fd_pb_push_uint32( fd_pb_encoder_t * encoder,
274 : uint field_id,
275 0 : uint value ) {
276 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) <
277 0 : (fd_pb_varint32_sz_max + fd_pb_varint32_sz_max) ) ) {
278 0 : return NULL;
279 0 : }
280 0 : uint tag = fd_pb_tag( FD_PB_WIRE_TYPE_VARINT, field_id );
281 0 : encoder->cur = fd_pb_append_tag ( encoder->cur, tag );
282 0 : encoder->cur = fd_pb_append_uint32( encoder->cur, value );
283 0 : return encoder;
284 0 : }
285 :
286 : static inline fd_pb_encoder_t *
287 : fd_pb_push_uint64( fd_pb_encoder_t * encoder,
288 : uint field_id,
289 0 : ulong value ) {
290 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) <
291 0 : (fd_pb_varint32_sz_max + fd_pb_varint64_sz_max) ) ) {
292 0 : return NULL;
293 0 : }
294 0 : uint tag = fd_pb_tag( FD_PB_WIRE_TYPE_VARINT, field_id );
295 0 : encoder->cur = fd_pb_append_tag ( encoder->cur, tag );
296 0 : encoder->cur = fd_pb_append_uint64( encoder->cur, value );
297 0 : return encoder;
298 0 : }
299 :
300 : static inline fd_pb_encoder_t *
301 : fd_pb_push_sint32( fd_pb_encoder_t * encoder,
302 : uint field_id,
303 0 : int value ) {
304 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) <
305 0 : (fd_pb_varint32_sz_max + fd_pb_varint32_sz_max) ) ) {
306 0 : return NULL;
307 0 : }
308 0 : uint tag = fd_pb_tag( FD_PB_WIRE_TYPE_VARINT, field_id );
309 0 : encoder->cur = fd_pb_append_tag ( encoder->cur, tag );
310 0 : encoder->cur = fd_pb_append_sint32( encoder->cur, value );
311 0 : return encoder;
312 0 : }
313 :
314 : static inline fd_pb_encoder_t *
315 : fd_pb_push_sint64( fd_pb_encoder_t * encoder,
316 : uint field_id,
317 0 : long value ) {
318 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) <
319 0 : (fd_pb_varint32_sz_max + fd_pb_varint64_sz_max) ) ) {
320 0 : return NULL;
321 0 : }
322 0 : uint tag = fd_pb_tag( FD_PB_WIRE_TYPE_VARINT, field_id );
323 0 : encoder->cur = fd_pb_append_tag ( encoder->cur, tag );
324 0 : encoder->cur = fd_pb_append_sint64( encoder->cur, value );
325 0 : return encoder;
326 0 : }
327 :
328 : static inline fd_pb_encoder_t *
329 : fd_pb_push_fixed32( fd_pb_encoder_t * encoder,
330 : uint field_id,
331 0 : uint value ) {
332 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) <
333 0 : (fd_pb_varint32_sz_max + sizeof(uint)) ) ) {
334 0 : return NULL;
335 0 : }
336 0 : uint tag = fd_pb_tag( FD_PB_WIRE_TYPE_I32, field_id );
337 0 : encoder->cur = fd_pb_append_tag ( encoder->cur, tag );
338 0 : encoder->cur = fd_pb_append_fixed32( encoder->cur, value );
339 0 : return encoder;
340 0 : }
341 :
342 : static inline fd_pb_encoder_t *
343 : fd_pb_push_fixed64( fd_pb_encoder_t * encoder,
344 : uint field_id,
345 0 : ulong value ) {
346 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) <
347 0 : (fd_pb_varint32_sz_max + sizeof(ulong)) ) ) {
348 0 : return NULL;
349 0 : }
350 0 : uint tag = fd_pb_tag( FD_PB_WIRE_TYPE_I64, field_id );
351 0 : encoder->cur = fd_pb_append_tag ( encoder->cur, tag );
352 0 : encoder->cur = fd_pb_append_fixed64( encoder->cur, value );
353 0 : return encoder;
354 0 : }
355 :
356 : static inline fd_pb_encoder_t *
357 : fd_pb_push_float( fd_pb_encoder_t * encoder,
358 : uint field_id,
359 0 : float value ) {
360 0 : union { float f; uint u; } cast;
361 0 : cast.f = value;
362 0 : return fd_pb_push_fixed32( encoder, field_id, cast.u );
363 0 : }
364 :
365 : #if FD_HAS_DOUBLE
366 : static inline fd_pb_encoder_t *
367 : fd_pb_push_double( fd_pb_encoder_t * encoder,
368 : uint field_id,
369 0 : double value ) {
370 0 : union { double d; ulong v; } cast;
371 0 : cast.d = value;
372 0 : return fd_pb_push_fixed64( encoder, field_id, cast.v );
373 0 : }
374 : #endif
375 :
376 : static inline fd_pb_encoder_t *
377 : fd_pb_push_bytes( fd_pb_encoder_t * encoder,
378 : uint field_id,
379 : void const * buf,
380 0 : ulong sz ) {
381 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) <
382 0 : (fd_pb_varint32_sz_max + fd_pb_varint32_sz_max + sz) ||
383 0 : sz>UINT_MAX ) ) { /* max message size, overflow protect */
384 0 : return NULL;
385 0 : }
386 0 : uint tag = fd_pb_tag( FD_PB_WIRE_TYPE_LEN, field_id );
387 0 : encoder->cur = fd_pb_append_tag ( encoder->cur, tag );
388 0 : encoder->cur = fd_pb_append_uint32( encoder->cur, (uint)sz );
389 0 : fd_memcpy( encoder->cur, buf, sz );
390 0 : encoder->cur += sz;
391 0 : return encoder;
392 0 : }
393 :
394 : static inline fd_pb_encoder_t *
395 : fd_pb_push_string( fd_pb_encoder_t * encoder,
396 : uint field_id,
397 : char const * str,
398 0 : ulong len ) {
399 0 : return fd_pb_push_bytes( encoder, field_id, str, len );
400 0 : }
401 :
402 : static inline fd_pb_encoder_t *
403 : fd_pb_push_cstr( fd_pb_encoder_t * encoder,
404 : uint field_id,
405 0 : char const * cstr ) {
406 : /* FIXME could do this in a single pass */
407 0 : return fd_pb_push_string( encoder, field_id, cstr, strlen( cstr ) );
408 0 : }
409 :
410 : /* The below fd_pb_push_packed_<type> methods append a packed repeated
411 : field (a sequence of scalars). These are analogous to the above
412 : encoders, except that they encode multiple values. values points to
413 : a contiguous array of values. cnt is the number of values (zero is
414 : fine).
415 :
416 : On the wire, a packed repeated field is a LEN type field with the
417 : content set to the concatenated serializations of each element.
418 :
419 : These are all equivalent to the following pattern:
420 :
421 : fd_pb_lp_open()
422 : for( each element ) fd_pb_append_type()
423 : fd_pb_lp_close()
424 :
425 : fd_pb_push_packed_bool is deliberately omitted. */
426 :
427 : #define FD_PB_PUSH_PACKED( type, ctype ) \
428 : FD_FN_UNUSED static fd_pb_encoder_t * \
429 : fd_pb_push_packed_##type( fd_pb_encoder_t * encoder, \
430 : uint field_id, \
431 : ctype const * values, \
432 0 : ulong cnt ) { \
433 0 : /* would exceed max message size? */ \
434 0 : if( FD_UNLIKELY( cnt>UINT_MAX ) ) return NULL; \
435 0 : if( FD_UNLIKELY( !fd_pb_lp_open( encoder, field_id ) ) ) return NULL; \
436 0 : if( FD_LIKELY( cnt*fd_pb_##type##_sz_max < \
437 0 : fd_pb_encoder_space( encoder ) ) ) { \
438 0 : /* optimize for fast append */ \
439 0 : for( ulong i=0UL; i<cnt; i++ ) { \
440 0 : encoder->cur = fd_pb_append_##type( encoder->cur, values[ i ] ); \
441 0 : } \
442 0 : } else { \
443 0 : /* cold code */ \
444 0 : for( ulong i=0UL; i<cnt; i++ ) { \
445 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) < \
446 0 : fd_pb_##type##_sz_max ) ) return NULL; \
447 0 : encoder->cur = fd_pb_append_##type( encoder->cur, values[ i ] ); \
448 0 : } \
449 0 : } \
450 0 : fd_pb_lp_close( encoder ); \
451 0 : return encoder; \
452 0 : }
453 :
454 0 : FD_PB_PUSH_PACKED( int32, int )
455 : FD_PB_PUSH_PACKED( int64, long )
456 0 : FD_PB_PUSH_PACKED( uint32, uint )
457 : FD_PB_PUSH_PACKED( uint64, ulong )
458 : FD_PB_PUSH_PACKED( sint32, int )
459 : FD_PB_PUSH_PACKED( sint64, long )
460 :
461 : #define FD_PB_PUSH_PACKED_STATIC( type, ctype ) \
462 : FD_FN_UNUSED static fd_pb_encoder_t * \
463 : fd_pb_push_packed_##type( fd_pb_encoder_t * encoder, \
464 : uint field_id, \
465 : ctype const * values, \
466 0 : ulong cnt ) { \
467 0 : /* write header */ \
468 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) < \
469 0 : (fd_pb_varint32_sz_max + fd_pb_varint64_sz_max) ) ) {\
470 0 : return NULL; \
471 0 : } \
472 0 : uint tag = fd_pb_tag( FD_PB_WIRE_TYPE_LEN, field_id ); \
473 0 : ulong data_sz = cnt*fd_pb_##type##_sz_max; \
474 0 : encoder->cur = fd_pb_append_uint32 ( encoder->cur, tag ); \
475 0 : encoder->cur = fd_pb_append_uint64 ( encoder->cur, data_sz ); \
476 0 : /* write data */ \
477 0 : if( FD_UNLIKELY( fd_pb_encoder_space( encoder ) < data_sz ) ) { \
478 0 : return NULL; \
479 0 : } \
480 0 : fd_memcpy( encoder->cur, values, data_sz ); \
481 0 : encoder->cur += data_sz; \
482 0 : return encoder; \
483 0 : }
484 :
485 0 : FD_PB_PUSH_PACKED_STATIC( fixed32, uint )
486 0 : FD_PB_PUSH_PACKED_STATIC( fixed64, ulong )
487 :
488 : #undef FD_PB_PUSH_PACKED
489 :
490 : static inline fd_pb_encoder_t *
491 : fd_pb_push_packed_float( fd_pb_encoder_t * encoder,
492 : uint field_id,
493 : float const * value,
494 0 : ulong cnt ) {
495 0 : return fd_pb_push_packed_fixed32( encoder, field_id, fd_type_pun_const( value ), cnt );
496 0 : }
497 :
498 : #if FD_HAS_DOUBLE
499 : static inline fd_pb_encoder_t *
500 : fd_pb_push_packed_double( fd_pb_encoder_t * encoder,
501 : uint field_id,
502 : double const * value,
503 0 : ulong cnt ) {
504 0 : return fd_pb_push_packed_fixed64( encoder, field_id, fd_type_pun_const( value ), cnt );
505 0 : }
506 : #endif
507 :
508 : FD_PROTOTYPES_END
509 :
510 : #endif /* HEADER_fd_src_ballet_pb_fd_pb_encode_h */
|