LCOV - code coverage report
Current view: top level - util/net - fd_eth.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 34 35 97.1 %
Date: 2024-11-13 11:58:15 Functions: 9 351 2.6 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_util_net_fd_eth_h
       2             : #define HEADER_fd_src_util_net_fd_eth_h
       3             : 
       4             : #include "../bits/fd_bits.h"
       5             : 
       6             : /* Host side ethernet protocol crash course:
       7             : 
       8             :    In terms of logical bytes on the wire, a non-Jumbo normal ethernet
       9             :    packet looks like:
      10             : 
      11             :               |     | <- 4B*TAG_CNT --> | <------ at most 1500B -------> |     |
      12             :               |     |      |     |      |                                |     |
      13             :               | eth | vlan |     | vlan |              eth               |     |
      14             :      preamble | hdr | tag  |     | tag  |            payload             | fcs | ifg
      15             :        ~8B    | 14B |  4B  | ... |  4B  | [max(0B,46B-4B*TAG_CNT),1500B] | 4B  | ~12B
      16             :               |                                                          |     |
      17             :               |               what hardware typically shows              |     |
      18             :               | <---------- at most 14B + 4B*TAG_CNT + 1500B ----------> |     |
      19             :               |                                                                |
      20             :               | <----------------------- at least 64B -----------------------> |
      21             : 
      22             :    The preamble (an oscillatory bit pattern), FCS (frame check sequence
      23             :    / CRC / cyclic redundancy check) and IFG (interframe gap / quiet
      24             :    time) are usually not shown to threads receiving a packet from an
      25             :    Ethernet LAN (these are historically for helping synchronize a
      26             :    receiver with a sender on receipt of a packet from the sender and
      27             :    validating a packet was received correctly with reasonably high
      28             :    probability).  Packets with a bad FCS may or may not be shown to an
      29             :    application depending on the hardware, interface and how it was
      30             :    configured.
      31             : 
      32             :    Threads sending packets usually do not worry about the preamble, FCS
      33             :    or IFG.  These are typically stripped by the underlying hardware on
      34             :    receive and inserted in the appropriate locations on send.  Just as
      35             :    often, hardware given a "runt" payload to send (smaller than the
      36             :    minimum size above) will zero pad the payload to the minimal payload
      37             :    size and the fcs will cover this zero padding too).
      38             : 
      39             :    VLAN tags have an unfortunately far wider range of behaviors in the
      40             :    wild due to the rather messy set of protocols that have accumulated
      41             :    over the decades.
      42             : 
      43             :    TAG_CNT can only be determined by parsing the packet headers.  0 (raw
      44             :    Ethernet) or 1 (VLAN tagged ethernet) are common but there isn't an
      45             :    obvious theoretical upper limit to TAG_CNT (nobody seems to have
      46             :    seriously thought about it).  For example, queue-in-queue network
      47             :    configs and/or various capture devices might insert additional tags
      48             :    to further decorate a packet.  Thus, 2 vlan tags isn't unheard of
      49             :    (e.g. queue-in-queue or a capture device adding a tag to vlan tagged
      50             :    ethernet indicating timestamp info has been provided for the packet
      51             :    somehow) or even 3 (e.g. capture device tagging a 2 vlan tag packet).
      52             : 
      53             :    Similarly, hardware might insert or strip VLAN tags behind a thread's
      54             :    back depending on the network, hardware, interface and how it was
      55             :    configured.  And different hardware devices and hardware-software
      56             :    interfaces have ideas as to what applications should be exposed to.
      57             :    And depending where a packet is inspected, it might have different
      58             :    number of tags.
      59             : 
      60             :    As a practical matter, most applications have some set of VLAN tag
      61             :    behaviors they understand / expect for the combination of LAN, WAN,
      62             :    NIC and interface they support.  Often this is implicit / evolved as
      63             :    most application devs are blissfully unaware of all this.  E.g. the
      64             :    maximum number of VLAN tags they can handle is implicitly bounded by
      65             :    their buffer sizes / buffer management, their range of expected
      66             :    behaviors is bounded by what worked in testing on their combination
      67             :    of lab hardware and equipment, etc.
      68             : 
      69             :    MAC addresses have 6 bytes.  Bit 0 in the byte 0 indicates whether or
      70             :    not the MAC address multicast or unicast.  Bit 1 in byte 0 indicates
      71             :    whether or not the MAC address is locally administered.  For standard
      72             :    unicast MAC addresses (not locally admin'd), the first 3 bytes encode
      73             :    an OUI (organizationally unique identifier).  The last 3 bytes are
      74             :    then assigned by the organization to hardware such that Ethernet
      75             :    conformant hardware will all have globally unique MAC addresses.  Most
      76             :    anything goes for locally admin'd addresses.  Additional notes are
      77             :    below.
      78             : 
      79             :    It is notable that:
      80             : 
      81             :    - The FCS does not protect against header or VLAN tag corruption.
      82             :      Applications, even in non-malicious scenarios, can not assume the
      83             :      headers are valid.  In most non-malicious scenarios though,
      84             :      applications can assume that that corrupted headers are reasonably
      85             :      rare and thus need not be optimized.  In some non-malicious
      86             :      scenarios though, header corrupt is common enough to warrant
      87             :      optimized handling.
      88             : 
      89             :    - Routing and flow steering mechanisms for Ethernet tend to not be
      90             :      precise.  That is, applications should not assume they will only
      91             :      receive packets they care about.  Like the above, in non-malicious
      92             :      scenarios, applications usually can assume they will mostly receive
      93             :      packets they care about and that the record of packets they care
      94             :      about is reasonably complete (such that they don't need to worry
      95             :      optimize filtering irrelevant packets or optimizing for drop
      96             :      recovery).  There are notable non-malicious exceptions though.
      97             : 
      98             :    - The FCS does not provide sufficiently strong protection against
      99             :      invalid packet receipt in many modern real world scenarios, even
     100             :      non-malicious.  Various combination of high bandwidth links, large
     101             :      number of senders, large number of receivers and high BER links
     102             :      create situations where corrupted payloads pass the FCS check and
     103             :      thus get exposed to the application.  Application need to be able
     104             :      to detect and recover from to their satisfaction.  In non-malicious
     105             :      scenarios, this rate tends to be low enough relative to the overall
     106             :      application packet rates so as to not require optimized handling
     107             :      (e.g. fast detect, treat as drop and use standard drop recovery
     108             :      mechanisms). */
     109             : 
     110    19425602 : #define FD_ETH_HDR_TYPE_IP   ((ushort)0x0800) /* (In host byte order) This hdr/tag is followed by an IP packet */
     111             : #define FD_ETH_HDR_TYPE_ARP  ((ushort)0x0806) /* (In host byte order) This hdr/tag is followed by an ARP packet */
     112             : #define FD_ETH_HDR_TYPE_VLAN ((ushort)0x8100) /* (In host byte order) This hdr/tag is followed by a VLAN tag */
     113             : 
     114           9 : #define FD_ETH_FCS_APPEND_SEED (0U) /* Seed to start an incremental fcs calculation */
     115             : 
     116             : /* FD_ETH_PAYLOAD_{MAX,MIN_RAW} return the appropriate payload size
     117             :    limits in bytes as a ulong for a normal untagged ethernet packet.
     118             : 
     119             :    FD_ETH_PAYLOAD_MIN returns the minimum size payload in bytes for an
     120             :    ethernet packet with the tag_cnt vlan tags.  Should be compile time
     121             :    const given compile time tag_cnt.  User promises tag_cnt is in
     122             :    [0,11].  Payloads smaller than this in software will get zero padded
     123             :    to this by hardware under the hood typically.  Note:
     124             :    FD_ETH_PAYLOAD_MIN(0)==FD_ETH_PAYLOAD_MIN_RAW. */
     125             : 
     126           0 : #define FD_ETH_PAYLOAD_MAX          (1500UL)
     127             : #define FD_ETH_PAYLOAD_MIN_RAW      (46UL)
     128             : #define FD_ETH_PAYLOAD_MIN(tag_cnt) (FD_ETH_PAYLOAD_MIN_RAW-4UL*(ulong)(tag_cnt))
     129             : 
     130             : /* Ethernet header */
     131             : 
     132             : struct fd_eth_hdr {
     133             :   uchar  dst[6];   /* Destination MAC address */
     134             :   uchar  src[6];   /* Source MAC address */
     135             :   ushort net_type; /* Type of packet encapsulated, net order */
     136             : };
     137             : 
     138             : typedef struct fd_eth_hdr fd_eth_hdr_t;
     139             : 
     140             : /* FD_ETH_MAC_FMT / FD_ETH_MAC_FMT_ARGS are used to pretty print a MAC
     141             :    address by a printf style formatter.  m must be safe against multiple
     142             :    evaluation.  Example usage:
     143             : 
     144             :      fd_eth_hdr_t * hdr = ...;
     145             :      FD_LOG_NOTICE(( "DST MAC: " FD_ETH_MAC_FMT, FD_ETH_MAC_FMT_ARGS( hdr->dst ) */
     146             : 
     147             : #define FD_ETH_MAC_FMT         "%02x:%02x:%02x:%02x:%02x:%02x"
     148             : #define FD_ETH_MAC_FMT_ARGS(m) (uint)((m)[0]), (uint)((m)[1]), (uint)((m)[2]), (uint)((m)[3]), (uint)((m)[4]), (uint)((m)[5])
     149             : 
     150             : /* FIXME: CONSIDER PRETTY PRINTERS FOR THE WHOLE HDR? */
     151             : 
     152             : /* VLAN tag */
     153             : 
     154             : struct fd_vlan_tag {
     155             :   ushort net_vid;  /* [3-bit priority=0:7 ... 0 is lowest], [1-bit CFI=0], [12-bit VLAN tag], net order */
     156             :   ushort net_type; /* ethertype, net order */
     157             : };
     158             : 
     159             : typedef struct fd_vlan_tag fd_vlan_tag_t;
     160             : 
     161             : /* FIXME: CONSIDER PRETTY PRINTERS FOR THE TAG? */
     162             : 
     163             : FD_PROTOTYPES_BEGIN
     164             : 
     165             : /* fd_eth_mac_is_{mcast,local,bcast,ip4_mcast} test if a mac address is:
     166             :      mcast:     multicast (broadcast and ip4 multicast are special cases)
     167             :      local:     locally administered
     168             :      bcast:     Ethernet broadcast (implies mcast, implies local, implies not ip4_mcast)
     169             :      ip4_mcast: IP4 multicast (implies mcast, implies not local, implies not bcast) */
     170             : 
     171           9 : FD_FN_PURE static inline int fd_eth_mac_is_mcast( uchar const * mac ) { return !!(((uint)mac[0]) & 1U); }
     172             : 
     173           9 : FD_FN_PURE static inline int fd_eth_mac_is_local( uchar const * mac ) { return !!(((uint)mac[0]) & 2U); }
     174             : 
     175             : FD_FN_PURE static inline int
     176           9 : fd_eth_mac_is_bcast( uchar const * mac ) {
     177           9 :   return (fd_ulong_load_4_fast( mac ) + fd_ulong_load_2_fast( mac+4 ))==(0xffffffffUL + 0xffffUL);
     178           9 : }
     179             : 
     180             : FD_FN_PURE static inline int
     181           9 : fd_eth_mac_is_ip4_mcast( uchar const * mac ) {
     182           9 :   return fd_ulong_load_3_fast( mac )==0x5e0001UL;
     183           9 : }
     184             : 
     185             : /* fd_eth_fcs / fd_eth_fcs_append compute / incrementally update the fcs
     186             :    of an ethernet frame.  That is, if buf points to the bytes of an
     187             :    ethernet frame containing sz bytes (first byte of the ethernet header
     188             :    to the last byte of the ethernet payload inclusive), the ethernet fcs
     189             :    can be computed and appended to buf via something like:
     190             : 
     191             :      fcs = fd_eth_fcs( buf, sz );
     192             :      *((uint *)(buf+sz)) = fcs;
     193             : 
     194             :    (This assumes the platform is okay with potentially unaligned memory
     195             :    accesses.  The current implementation assumes a little endian
     196             :    platform as well but not too hard to make a variant for big endian
     197             :    platforms if necessary).
     198             : 
     199             :    This calculation can be done incrementally if useful.  E.g.:
     200             : 
     201             :      fcs = fd_eth_fcs       (      part1, part1_sz ); // or fd_eth_fcs_append( FD_ETH_FCS_APPEND_SEED, part1, part1_sz )
     202             :      fcs = fd_eth_fcs_append( fcs, part2, part2_sz );
     203             :      ...
     204             :      fcs = fd_eth_fcs_append( fcs, partn, partn_sz );
     205             : 
     206             :    yields the same result as:
     207             : 
     208             :      fcs = fd_eth_fcs( buf, sz )
     209             : 
     210             :    if buf/sz are the concatenation with no padding of the parts.
     211             : 
     212             :    The FCS computation under the hood is the IEEE802.3 crc32.  This
     213             :    currently is not a particularly fast implementation (byte at a time
     214             :    table lookup based) nor a particularly good hash function
     215             :    theoretically.  Rather, this is here for the rare application that
     216             :    needs to manually compute / validate an Ethernet FCS. */
     217             : 
     218             : FD_FN_PURE uint
     219             : fd_eth_fcs_append( uint         fcs,
     220             :                    void const * buf,
     221             :                    ulong        sz );
     222             : 
     223             : FD_FN_PURE static inline uint
     224             : fd_eth_fcs( void const * buf,
     225           6 :             ulong        sz ) {
     226           6 :   return fd_eth_fcs_append( (uint)FD_ETH_FCS_APPEND_SEED, buf, sz );
     227           6 : }
     228             : 
     229             : /* fd_eth_mac_ip4_mcast populates the 6 byte memory region whose first
     230             :    byte is pointed to by mac with the Ethernet MAC address corresponding
     231             :    to the given multicast IP4 addr in ip4_addr_mcast (i.e. x.y.z.w where
     232             :    the caller promises that x is in [224,239] and given such that x is
     233             :    in bits 0:7, y is in bits 8:15, z is in bits 16:23, w is in bits
     234             :    24:31 ... exactly how they would be if read directly from an IP hdr
     235             :    into a uint on this platform).  Returns mac. */
     236             : 
     237             : static inline uchar *
     238             : fd_eth_mac_ip4_mcast( uchar * mac,
     239           3 :                       uint    ip4_addr_mcast ) {
     240           3 :   FD_STORE( uint,   mac,   0x5e0001U | (((ip4_addr_mcast >> 8) & 0x7fU) << 24) );
     241           3 :   FD_STORE( ushort, mac+4, (ushort)((ip4_addr_mcast >> 16) & 0xffffU)          );
     242           3 :   return mac;
     243           3 : }
     244             : 
     245             : /* fd_eth_mac_bcast populates the 6 byte memory region whose first byte
     246             :    is pointed to by mac with the Ethernet MAC address corresponding to
     247             :    LAN broadcast.  Returns dst. */
     248             : 
     249             : static inline uchar *
     250           3 : fd_eth_mac_bcast( uchar * mac ) {
     251           3 :   FD_STORE( uint,   mac,   0xffffffffU    );
     252           3 :   FD_STORE( ushort, mac+4, (ushort)0xffff );
     253           3 :   return mac;
     254           3 : }
     255             : 
     256             : /* fd_eth_mac_cpy populates the 6 byte memory region whose first byte is
     257             :    pointed to by mac with the MAC address pointed whose first byte is
     258             :    pointed to by _mac.  mac should either not overlap or overlap with
     259             :    mac <= _mac.  Overlap with mac > _mac is not supported.  Returns mac. */
     260             : 
     261             : static inline uchar *
     262             : fd_eth_mac_cpy( uchar       * mac,
     263           3 :                 uchar const * _mac ) {
     264           3 :   FD_STORE( uint,   mac,   FD_LOAD( uint,   _mac   ) );
     265           3 :   FD_STORE( ushort, mac+4, FD_LOAD( ushort, _mac+4 ) );
     266           3 :   return mac;
     267           3 : }
     268             : 
     269             : /* fd_vlan_tag populates the memory region of size sizeof(fd_vlan_tag_t)
     270             :    and whose first byte is pointed to by the non-NULL _tag into a vlan
     271             :    tag for vlan vid and the given type with 0 priority and 0 CFI
     272             :    (priority and CFI are meant for router side use typically).  Returns
     273             :    _tag.  FIXME: OPTIMIZE BSWAPS? */
     274             : 
     275             : static inline fd_vlan_tag_t *
     276             : fd_vlan_tag( void * _tag,
     277             :              ushort vid,     /* Assumed in [0,4095], host order */
     278           3 :              ushort type ) { /* What follows this tag? */
     279           3 :   fd_vlan_tag_t * tag = (fd_vlan_tag_t *)_tag;
     280             :   /* FIXME: USE FD_STORE? */
     281           3 :   tag->net_vid  = fd_ushort_bswap( vid  );
     282           3 :   tag->net_type = fd_ushort_bswap( type );
     283           3 :   return tag;
     284           3 : }
     285             : 
     286             : /* fd_cstr_to_mac_addr parses a MAC address matching format
     287             :    FD_ETH_MAC_FMT from the given cstr and stores the result into mac.
     288             :    On success returns mac.  On failure, returns NULL and leaves mac in
     289             :    an undefined state.  On success, exactly 17 characters of s were
     290             :    processed. */
     291             : 
     292             : uchar *
     293             : fd_cstr_to_mac_addr( char const * s,
     294             :                      uchar      * mac );
     295             : 
     296             : FD_PROTOTYPES_END
     297             : 
     298             : #endif /* HEADER_fd_src_util_net_fd_eth_h */

Generated by: LCOV version 1.14