Line data Source code
1 : #ifndef HEADER_fd_src_util_net_fd_eth_h
2 : #define HEADER_fd_src_util_net_fd_eth_h
3 :
4 : #include "../bits/fd_bits.h"
5 :
6 : /* Host side ethernet protocol crash course:
7 :
8 : In terms of logical bytes on the wire, a non-Jumbo normal ethernet
9 : packet looks like:
10 :
11 : | | <- 4B*TAG_CNT --> | <------ at most 1500B -------> | |
12 : | | | | | | |
13 : | eth | vlan | | vlan | eth | |
14 : preamble | hdr | tag | | tag | payload | fcs | ifg
15 : ~8B | 14B | 4B | ... | 4B | [max(0B,46B-4B*TAG_CNT),1500B] | 4B | ~12B
16 : | | |
17 : | what hardware typically shows | |
18 : | <---------- at most 14B + 4B*TAG_CNT + 1500B ----------> | |
19 : | |
20 : | <----------------------- at least 64B -----------------------> |
21 :
22 : The preamble (an oscillatory bit pattern), FCS (frame check sequence
23 : / CRC / cyclic redundancy check) and IFG (interframe gap / quiet
24 : time) are usually not shown to threads receiving a packet from an
25 : Ethernet LAN (these are historically for helping synchronize a
26 : receiver with a sender on receipt of a packet from the sender and
27 : validating a packet was received correctly with reasonably high
28 : probability). Packets with a bad FCS may or may not be shown to an
29 : application depending on the hardware, interface and how it was
30 : configured.
31 :
32 : Threads sending packets usually do not worry about the preamble, FCS
33 : or IFG. These are typically stripped by the underlying hardware on
34 : receive and inserted in the appropriate locations on send. Just as
35 : often, hardware given a "runt" payload to send (smaller than the
36 : minimum size above) will zero pad the payload to the minimal payload
37 : size and the fcs will cover this zero padding too).
38 :
39 : VLAN tags have an unfortunately far wider range of behaviors in the
40 : wild due to the rather messy set of protocols that have accumulated
41 : over the decades.
42 :
43 : TAG_CNT can only be determined by parsing the packet headers. 0 (raw
44 : Ethernet) or 1 (VLAN tagged ethernet) are common but there isn't an
45 : obvious theoretical upper limit to TAG_CNT (nobody seems to have
46 : seriously thought about it). For example, queue-in-queue network
47 : configs and/or various capture devices might insert additional tags
48 : to further decorate a packet. Thus, 2 vlan tags isn't unheard of
49 : (e.g. queue-in-queue or a capture device adding a tag to vlan tagged
50 : ethernet indicating timestamp info has been provided for the packet
51 : somehow) or even 3 (e.g. capture device tagging a 2 vlan tag packet).
52 :
53 : Similarly, hardware might insert or strip VLAN tags behind a thread's
54 : back depending on the network, hardware, interface and how it was
55 : configured. And different hardware devices and hardware-software
56 : interfaces have ideas as to what applications should be exposed to.
57 : And depending where a packet is inspected, it might have different
58 : number of tags.
59 :
60 : As a practical matter, most applications have some set of VLAN tag
61 : behaviors they understand / expect for the combination of LAN, WAN,
62 : NIC and interface they support. Often this is implicit / evolved as
63 : most application devs are blissfully unaware of all this. E.g. the
64 : maximum number of VLAN tags they can handle is implicitly bounded by
65 : their buffer sizes / buffer management, their range of expected
66 : behaviors is bounded by what worked in testing on their combination
67 : of lab hardware and equipment, etc.
68 :
69 : MAC addresses have 6 bytes. Bit 0 in the byte 0 indicates whether or
70 : not the MAC address multicast or unicast. Bit 1 in byte 0 indicates
71 : whether or not the MAC address is locally administered. For standard
72 : unicast MAC addresses (not locally admin'd), the first 3 bytes encode
73 : an OUI (organizationally unique identifier). The last 3 bytes are
74 : then assigned by the organization to hardware such that Ethernet
75 : conformant hardware will all have globally unique MAC addresses. Most
76 : anything goes for locally admin'd addresses. Additional notes are
77 : below.
78 :
79 : It is notable that:
80 :
81 : - The FCS does not protect against header or VLAN tag corruption.
82 : Applications, even in non-malicious scenarios, can not assume the
83 : headers are valid. In most non-malicious scenarios though,
84 : applications can assume that that corrupted headers are reasonably
85 : rare and thus need not be optimized. In some non-malicious
86 : scenarios though, header corrupt is common enough to warrant
87 : optimized handling.
88 :
89 : - Routing and flow steering mechanisms for Ethernet tend to not be
90 : precise. That is, applications should not assume they will only
91 : receive packets they care about. Like the above, in non-malicious
92 : scenarios, applications usually can assume they will mostly receive
93 : packets they care about and that the record of packets they care
94 : about is reasonably complete (such that they don't need to worry
95 : optimize filtering irrelevant packets or optimizing for drop
96 : recovery). There are notable non-malicious exceptions though.
97 :
98 : - The FCS does not provide sufficiently strong protection against
99 : invalid packet receipt in many modern real world scenarios, even
100 : non-malicious. Various combination of high bandwidth links, large
101 : number of senders, large number of receivers and high BER links
102 : create situations where corrupted payloads pass the FCS check and
103 : thus get exposed to the application. Application need to be able
104 : to detect and recover from to their satisfaction. In non-malicious
105 : scenarios, this rate tends to be low enough relative to the overall
106 : application packet rates so as to not require optimized handling
107 : (e.g. fast detect, treat as drop and use standard drop recovery
108 : mechanisms). */
109 :
110 19425602 : #define FD_ETH_HDR_TYPE_IP ((ushort)0x0800) /* (In host byte order) This hdr/tag is followed by an IP packet */
111 : #define FD_ETH_HDR_TYPE_ARP ((ushort)0x0806) /* (In host byte order) This hdr/tag is followed by an ARP packet */
112 : #define FD_ETH_HDR_TYPE_VLAN ((ushort)0x8100) /* (In host byte order) This hdr/tag is followed by a VLAN tag */
113 :
114 9 : #define FD_ETH_FCS_APPEND_SEED (0U) /* Seed to start an incremental fcs calculation */
115 :
116 : /* FD_ETH_PAYLOAD_{MAX,MIN_RAW} return the appropriate payload size
117 : limits in bytes as a ulong for a normal untagged ethernet packet.
118 :
119 : FD_ETH_PAYLOAD_MIN returns the minimum size payload in bytes for an
120 : ethernet packet with the tag_cnt vlan tags. Should be compile time
121 : const given compile time tag_cnt. User promises tag_cnt is in
122 : [0,11]. Payloads smaller than this in software will get zero padded
123 : to this by hardware under the hood typically. Note:
124 : FD_ETH_PAYLOAD_MIN(0)==FD_ETH_PAYLOAD_MIN_RAW. */
125 :
126 0 : #define FD_ETH_PAYLOAD_MAX (1500UL)
127 : #define FD_ETH_PAYLOAD_MIN_RAW (46UL)
128 : #define FD_ETH_PAYLOAD_MIN(tag_cnt) (FD_ETH_PAYLOAD_MIN_RAW-4UL*(ulong)(tag_cnt))
129 :
130 : /* Ethernet header */
131 :
132 : struct fd_eth_hdr {
133 : uchar dst[6]; /* Destination MAC address */
134 : uchar src[6]; /* Source MAC address */
135 : ushort net_type; /* Type of packet encapsulated, net order */
136 : };
137 :
138 : typedef struct fd_eth_hdr fd_eth_hdr_t;
139 :
140 : /* FD_ETH_MAC_FMT / FD_ETH_MAC_FMT_ARGS are used to pretty print a MAC
141 : address by a printf style formatter. m must be safe against multiple
142 : evaluation. Example usage:
143 :
144 : fd_eth_hdr_t * hdr = ...;
145 : FD_LOG_NOTICE(( "DST MAC: " FD_ETH_MAC_FMT, FD_ETH_MAC_FMT_ARGS( hdr->dst ) */
146 :
147 : #define FD_ETH_MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
148 : #define FD_ETH_MAC_FMT_ARGS(m) (uint)((m)[0]), (uint)((m)[1]), (uint)((m)[2]), (uint)((m)[3]), (uint)((m)[4]), (uint)((m)[5])
149 :
150 : /* FIXME: CONSIDER PRETTY PRINTERS FOR THE WHOLE HDR? */
151 :
152 : /* VLAN tag */
153 :
154 : struct fd_vlan_tag {
155 : ushort net_vid; /* [3-bit priority=0:7 ... 0 is lowest], [1-bit CFI=0], [12-bit VLAN tag], net order */
156 : ushort net_type; /* ethertype, net order */
157 : };
158 :
159 : typedef struct fd_vlan_tag fd_vlan_tag_t;
160 :
161 : /* FIXME: CONSIDER PRETTY PRINTERS FOR THE TAG? */
162 :
163 : FD_PROTOTYPES_BEGIN
164 :
165 : /* fd_eth_mac_is_{mcast,local,bcast,ip4_mcast} test if a mac address is:
166 : mcast: multicast (broadcast and ip4 multicast are special cases)
167 : local: locally administered
168 : bcast: Ethernet broadcast (implies mcast, implies local, implies not ip4_mcast)
169 : ip4_mcast: IP4 multicast (implies mcast, implies not local, implies not bcast) */
170 :
171 9 : FD_FN_PURE static inline int fd_eth_mac_is_mcast( uchar const * mac ) { return !!(((uint)mac[0]) & 1U); }
172 :
173 9 : FD_FN_PURE static inline int fd_eth_mac_is_local( uchar const * mac ) { return !!(((uint)mac[0]) & 2U); }
174 :
175 : FD_FN_PURE static inline int
176 9 : fd_eth_mac_is_bcast( uchar const * mac ) {
177 9 : return (fd_ulong_load_4_fast( mac ) + fd_ulong_load_2_fast( mac+4 ))==(0xffffffffUL + 0xffffUL);
178 9 : }
179 :
180 : FD_FN_PURE static inline int
181 9 : fd_eth_mac_is_ip4_mcast( uchar const * mac ) {
182 9 : return fd_ulong_load_3_fast( mac )==0x5e0001UL;
183 9 : }
184 :
185 : /* fd_eth_fcs / fd_eth_fcs_append compute / incrementally update the fcs
186 : of an ethernet frame. That is, if buf points to the bytes of an
187 : ethernet frame containing sz bytes (first byte of the ethernet header
188 : to the last byte of the ethernet payload inclusive), the ethernet fcs
189 : can be computed and appended to buf via something like:
190 :
191 : fcs = fd_eth_fcs( buf, sz );
192 : *((uint *)(buf+sz)) = fcs;
193 :
194 : (This assumes the platform is okay with potentially unaligned memory
195 : accesses. The current implementation assumes a little endian
196 : platform as well but not too hard to make a variant for big endian
197 : platforms if necessary).
198 :
199 : This calculation can be done incrementally if useful. E.g.:
200 :
201 : fcs = fd_eth_fcs ( part1, part1_sz ); // or fd_eth_fcs_append( FD_ETH_FCS_APPEND_SEED, part1, part1_sz )
202 : fcs = fd_eth_fcs_append( fcs, part2, part2_sz );
203 : ...
204 : fcs = fd_eth_fcs_append( fcs, partn, partn_sz );
205 :
206 : yields the same result as:
207 :
208 : fcs = fd_eth_fcs( buf, sz )
209 :
210 : if buf/sz are the concatenation with no padding of the parts.
211 :
212 : The FCS computation under the hood is the IEEE802.3 crc32. This
213 : currently is not a particularly fast implementation (byte at a time
214 : table lookup based) nor a particularly good hash function
215 : theoretically. Rather, this is here for the rare application that
216 : needs to manually compute / validate an Ethernet FCS. */
217 :
218 : FD_FN_PURE uint
219 : fd_eth_fcs_append( uint fcs,
220 : void const * buf,
221 : ulong sz );
222 :
223 : FD_FN_PURE static inline uint
224 : fd_eth_fcs( void const * buf,
225 6 : ulong sz ) {
226 6 : return fd_eth_fcs_append( (uint)FD_ETH_FCS_APPEND_SEED, buf, sz );
227 6 : }
228 :
229 : /* fd_eth_mac_ip4_mcast populates the 6 byte memory region whose first
230 : byte is pointed to by mac with the Ethernet MAC address corresponding
231 : to the given multicast IP4 addr in ip4_addr_mcast (i.e. x.y.z.w where
232 : the caller promises that x is in [224,239] and given such that x is
233 : in bits 0:7, y is in bits 8:15, z is in bits 16:23, w is in bits
234 : 24:31 ... exactly how they would be if read directly from an IP hdr
235 : into a uint on this platform). Returns mac. */
236 :
237 : static inline uchar *
238 : fd_eth_mac_ip4_mcast( uchar * mac,
239 3 : uint ip4_addr_mcast ) {
240 3 : FD_STORE( uint, mac, 0x5e0001U | (((ip4_addr_mcast >> 8) & 0x7fU) << 24) );
241 3 : FD_STORE( ushort, mac+4, (ushort)((ip4_addr_mcast >> 16) & 0xffffU) );
242 3 : return mac;
243 3 : }
244 :
245 : /* fd_eth_mac_bcast populates the 6 byte memory region whose first byte
246 : is pointed to by mac with the Ethernet MAC address corresponding to
247 : LAN broadcast. Returns dst. */
248 :
249 : static inline uchar *
250 3 : fd_eth_mac_bcast( uchar * mac ) {
251 3 : FD_STORE( uint, mac, 0xffffffffU );
252 3 : FD_STORE( ushort, mac+4, (ushort)0xffff );
253 3 : return mac;
254 3 : }
255 :
256 : /* fd_eth_mac_cpy populates the 6 byte memory region whose first byte is
257 : pointed to by mac with the MAC address pointed whose first byte is
258 : pointed to by _mac. mac should either not overlap or overlap with
259 : mac <= _mac. Overlap with mac > _mac is not supported. Returns mac. */
260 :
261 : static inline uchar *
262 : fd_eth_mac_cpy( uchar * mac,
263 3 : uchar const * _mac ) {
264 3 : FD_STORE( uint, mac, FD_LOAD( uint, _mac ) );
265 3 : FD_STORE( ushort, mac+4, FD_LOAD( ushort, _mac+4 ) );
266 3 : return mac;
267 3 : }
268 :
269 : /* fd_vlan_tag populates the memory region of size sizeof(fd_vlan_tag_t)
270 : and whose first byte is pointed to by the non-NULL _tag into a vlan
271 : tag for vlan vid and the given type with 0 priority and 0 CFI
272 : (priority and CFI are meant for router side use typically). Returns
273 : _tag. FIXME: OPTIMIZE BSWAPS? */
274 :
275 : static inline fd_vlan_tag_t *
276 : fd_vlan_tag( void * _tag,
277 : ushort vid, /* Assumed in [0,4095], host order */
278 3 : ushort type ) { /* What follows this tag? */
279 3 : fd_vlan_tag_t * tag = (fd_vlan_tag_t *)_tag;
280 : /* FIXME: USE FD_STORE? */
281 3 : tag->net_vid = fd_ushort_bswap( vid );
282 3 : tag->net_type = fd_ushort_bswap( type );
283 3 : return tag;
284 3 : }
285 :
286 : /* fd_cstr_to_mac_addr parses a MAC address matching format
287 : FD_ETH_MAC_FMT from the given cstr and stores the result into mac.
288 : On success returns mac. On failure, returns NULL and leaves mac in
289 : an undefined state. On success, exactly 17 characters of s were
290 : processed. */
291 :
292 : uchar *
293 : fd_cstr_to_mac_addr( char const * s,
294 : uchar * mac );
295 :
296 : FD_PROTOTYPES_END
297 :
298 : #endif /* HEADER_fd_src_util_net_fd_eth_h */
|