linux/include/net/vxlan.h
<<
>>
Prefs
   1#ifndef __NET_VXLAN_H
   2#define __NET_VXLAN_H 1
   3
   4#include <linux/if_vlan.h>
   5#include <net/udp_tunnel.h>
   6#include <net/dst_metadata.h>
   7#include <net/udp_tunnel.h>
   8
   9/* VXLAN protocol (RFC 7348) header:
  10 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  11 * |R|R|R|R|I|R|R|R|               Reserved                        |
  12 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  13 * |                VXLAN Network Identifier (VNI) |   Reserved    |
  14 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  15 *
  16 * I = VXLAN Network Identifier (VNI) present.
  17 */
  18struct vxlanhdr {
  19        __be32 vx_flags;
  20        __be32 vx_vni;
  21};
  22
  23/* VXLAN header flags. */
  24#define VXLAN_HF_VNI    cpu_to_be32(BIT(27))
  25
  26#define VXLAN_N_VID     (1u << 24)
  27#define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
  28#define VXLAN_VNI_MASK  cpu_to_be32(VXLAN_VID_MASK << 8)
  29#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
  30
  31#define VNI_HASH_BITS   10
  32#define VNI_HASH_SIZE   (1<<VNI_HASH_BITS)
  33#define FDB_HASH_BITS   8
  34#define FDB_HASH_SIZE   (1<<FDB_HASH_BITS)
  35
  36/* Remote checksum offload for VXLAN (VXLAN_F_REMCSUM_[RT]X):
  37 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  38 * |R|R|R|R|I|R|R|R|R|R|C|              Reserved                   |
  39 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  40 * |           VXLAN Network Identifier (VNI)      |O| Csum start  |
  41 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  42 *
  43 * C = Remote checksum offload bit. When set indicates that the
  44 *     remote checksum offload data is present.
  45 *
  46 * O = Offset bit. Indicates the checksum offset relative to
  47 *     checksum start.
  48 *
  49 * Csum start = Checksum start divided by two.
  50 *
  51 * http://tools.ietf.org/html/draft-herbert-vxlan-rco
  52 */
  53
  54/* VXLAN-RCO header flags. */
  55#define VXLAN_HF_RCO    cpu_to_be32(BIT(21))
  56
  57/* Remote checksum offload header option */
  58#define VXLAN_RCO_MASK  cpu_to_be32(0x7f)  /* Last byte of vni field */
  59#define VXLAN_RCO_UDP   cpu_to_be32(0x80)  /* Indicate UDP RCO (TCP when not set *) */
  60#define VXLAN_RCO_SHIFT 1                  /* Left shift of start */
  61#define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1)
  62#define VXLAN_MAX_REMCSUM_START (0x7f << VXLAN_RCO_SHIFT)
  63
  64/*
  65 * VXLAN Group Based Policy Extension (VXLAN_F_GBP):
  66 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  67 * |G|R|R|R|I|R|R|R|R|D|R|R|A|R|R|R|        Group Policy ID        |
  68 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  69 * |                VXLAN Network Identifier (VNI) |   Reserved    |
  70 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  71 *
  72 * G = Group Policy ID present.
  73 *
  74 * D = Don't Learn bit. When set, this bit indicates that the egress
  75 *     VTEP MUST NOT learn the source address of the encapsulated frame.
  76 *
  77 * A = Indicates that the group policy has already been applied to
  78 *     this packet. Policies MUST NOT be applied by devices when the
  79 *     A bit is set.
  80 *
  81 * https://tools.ietf.org/html/draft-smith-vxlan-group-policy
  82 */
  83struct vxlanhdr_gbp {
  84        u8      vx_flags;
  85#ifdef __LITTLE_ENDIAN_BITFIELD
  86        u8      reserved_flags1:3,
  87                policy_applied:1,
  88                reserved_flags2:2,
  89                dont_learn:1,
  90                reserved_flags3:1;
  91#elif defined(__BIG_ENDIAN_BITFIELD)
  92        u8      reserved_flags1:1,
  93                dont_learn:1,
  94                reserved_flags2:2,
  95                policy_applied:1,
  96                reserved_flags3:3;
  97#else
  98#error  "Please fix <asm/byteorder.h>"
  99#endif
 100        __be16  policy_id;
 101        __be32  vx_vni;
 102};
 103
 104/* VXLAN-GBP header flags. */
 105#define VXLAN_HF_GBP    cpu_to_be32(BIT(31))
 106
 107#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | cpu_to_be32(0xFFFFFF))
 108
 109/* skb->mark mapping
 110 *
 111 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 112 * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R|        Group Policy ID        |
 113 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 114 */
 115#define VXLAN_GBP_DONT_LEARN            (BIT(6) << 16)
 116#define VXLAN_GBP_POLICY_APPLIED        (BIT(3) << 16)
 117#define VXLAN_GBP_ID_MASK               (0xFFFF)
 118
 119/*
 120 * VXLAN Generic Protocol Extension (VXLAN_F_GPE):
 121 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 122 * |R|R|Ver|I|P|R|O|       Reserved                |Next Protocol  |
 123 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 124 * |                VXLAN Network Identifier (VNI) |   Reserved    |
 125 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 126 *
 127 * Ver = Version. Indicates VXLAN GPE protocol version.
 128 *
 129 * P = Next Protocol Bit. The P bit is set to indicate that the
 130 *     Next Protocol field is present.
 131 *
 132 * O = OAM Flag Bit. The O bit is set to indicate that the packet
 133 *     is an OAM packet.
 134 *
 135 * Next Protocol = This 8 bit field indicates the protocol header
 136 * immediately following the VXLAN GPE header.
 137 *
 138 * https://tools.ietf.org/html/draft-ietf-nvo3-vxlan-gpe-01
 139 */
 140
 141struct vxlanhdr_gpe {
 142#if defined(__LITTLE_ENDIAN_BITFIELD)
 143        u8      oam_flag:1,
 144                reserved_flags1:1,
 145                np_applied:1,
 146                instance_applied:1,
 147                version:2,
 148reserved_flags2:2;
 149#elif defined(__BIG_ENDIAN_BITFIELD)
 150        u8      reserved_flags2:2,
 151                version:2,
 152                instance_applied:1,
 153                np_applied:1,
 154                reserved_flags1:1,
 155                oam_flag:1;
 156#endif
 157        u8      reserved_flags3;
 158        u8      reserved_flags4;
 159        u8      next_protocol;
 160        __be32  vx_vni;
 161};
 162
 163/* VXLAN-GPE header flags. */
 164#define VXLAN_HF_VER    cpu_to_be32(BIT(29) | BIT(28))
 165#define VXLAN_HF_NP     cpu_to_be32(BIT(26))
 166#define VXLAN_HF_OAM    cpu_to_be32(BIT(24))
 167
 168#define VXLAN_GPE_USED_BITS (VXLAN_HF_VER | VXLAN_HF_NP | VXLAN_HF_OAM | \
 169                             cpu_to_be32(0xff))
 170
 171/* VXLAN-GPE header Next Protocol. */
 172#define VXLAN_GPE_NP_IPV4      0x01
 173#define VXLAN_GPE_NP_IPV6      0x02
 174#define VXLAN_GPE_NP_ETHERNET  0x03
 175#define VXLAN_GPE_NP_NSH       0x04
 176
 177struct vxlan_metadata {
 178        u32             gbp;
 179};
 180
 181/* per UDP socket information */
 182struct vxlan_sock {
 183        struct hlist_node hlist;
 184        struct socket    *sock;
 185        struct hlist_head vni_list[VNI_HASH_SIZE];
 186        atomic_t          refcnt;
 187        u32               flags;
 188};
 189
 190union vxlan_addr {
 191        struct sockaddr_in sin;
 192        struct sockaddr_in6 sin6;
 193        struct sockaddr sa;
 194};
 195
 196struct vxlan_rdst {
 197        union vxlan_addr         remote_ip;
 198        __be16                   remote_port;
 199        __be32                   remote_vni;
 200        u32                      remote_ifindex;
 201        struct list_head         list;
 202        struct rcu_head          rcu;
 203        struct dst_cache         dst_cache;
 204};
 205
 206struct vxlan_config {
 207        union vxlan_addr        remote_ip;
 208        union vxlan_addr        saddr;
 209        __be32                  vni;
 210        int                     remote_ifindex;
 211        int                     mtu;
 212        __be16                  dst_port;
 213        u16                     port_min;
 214        u16                     port_max;
 215        u8                      tos;
 216        u8                      ttl;
 217        __be32                  label;
 218        u32                     flags;
 219        unsigned long           age_interval;
 220        unsigned int            addrmax;
 221        bool                    no_share;
 222};
 223
 224/* Pseudo network device */
 225struct vxlan_dev {
 226        struct hlist_node hlist;        /* vni hash table */
 227        struct list_head  next;         /* vxlan's per namespace list */
 228        struct vxlan_sock __rcu *vn4_sock;      /* listening socket for IPv4 */
 229#if IS_ENABLED(CONFIG_IPV6)
 230        struct vxlan_sock __rcu *vn6_sock;      /* listening socket for IPv6 */
 231#endif
 232        struct net_device *dev;
 233        struct net        *net;         /* netns for packet i/o */
 234        struct vxlan_rdst default_dst;  /* default destination */
 235        u32               flags;        /* VXLAN_F_* in vxlan.h */
 236
 237        struct timer_list age_timer;
 238        spinlock_t        hash_lock;
 239        unsigned int      addrcnt;
 240        struct gro_cells  gro_cells;
 241
 242        struct vxlan_config     cfg;
 243
 244        struct hlist_head fdb_head[FDB_HASH_SIZE];
 245};
 246
 247#define VXLAN_F_LEARN                   0x01
 248#define VXLAN_F_PROXY                   0x02
 249#define VXLAN_F_RSC                     0x04
 250#define VXLAN_F_L2MISS                  0x08
 251#define VXLAN_F_L3MISS                  0x10
 252#define VXLAN_F_IPV6                    0x20
 253#define VXLAN_F_UDP_ZERO_CSUM_TX        0x40
 254#define VXLAN_F_UDP_ZERO_CSUM6_TX       0x80
 255#define VXLAN_F_UDP_ZERO_CSUM6_RX       0x100
 256#define VXLAN_F_REMCSUM_TX              0x200
 257#define VXLAN_F_REMCSUM_RX              0x400
 258#define VXLAN_F_GBP                     0x800
 259#define VXLAN_F_REMCSUM_NOPARTIAL       0x1000
 260#define VXLAN_F_COLLECT_METADATA        0x2000
 261#define VXLAN_F_GPE                     0x4000
 262
 263/* Flags that are used in the receive path. These flags must match in
 264 * order for a socket to be shareable
 265 */
 266#define VXLAN_F_RCV_FLAGS               (VXLAN_F_GBP |                  \
 267                                         VXLAN_F_GPE |                  \
 268                                         VXLAN_F_UDP_ZERO_CSUM6_RX |    \
 269                                         VXLAN_F_REMCSUM_RX |           \
 270                                         VXLAN_F_REMCSUM_NOPARTIAL |    \
 271                                         VXLAN_F_COLLECT_METADATA)
 272
 273/* Flags that can be set together with VXLAN_F_GPE. */
 274#define VXLAN_F_ALLOWED_GPE             (VXLAN_F_GPE |                  \
 275                                         VXLAN_F_IPV6 |                 \
 276                                         VXLAN_F_UDP_ZERO_CSUM_TX |     \
 277                                         VXLAN_F_UDP_ZERO_CSUM6_TX |    \
 278                                         VXLAN_F_UDP_ZERO_CSUM6_RX |    \
 279                                         VXLAN_F_COLLECT_METADATA)
 280
 281struct net_device *vxlan_dev_create(struct net *net, const char *name,
 282                                    u8 name_assign_type, struct vxlan_config *conf);
 283
 284static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
 285                                                     netdev_features_t features)
 286{
 287        u8 l4_hdr = 0;
 288
 289        if (!skb->encapsulation)
 290                return features;
 291
 292        switch (vlan_get_protocol(skb)) {
 293        case htons(ETH_P_IP):
 294                l4_hdr = ip_hdr(skb)->protocol;
 295                break;
 296        case htons(ETH_P_IPV6):
 297                l4_hdr = ipv6_hdr(skb)->nexthdr;
 298                break;
 299        default:
 300                return features;;
 301        }
 302
 303        if ((l4_hdr == IPPROTO_UDP) &&
 304            (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
 305             skb->inner_protocol != htons(ETH_P_TEB) ||
 306             (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
 307              sizeof(struct udphdr) + sizeof(struct vxlanhdr)) ||
 308             (skb->ip_summed != CHECKSUM_NONE &&
 309              !can_checksum_protocol(features, inner_eth_hdr(skb)->h_proto))))
 310                return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 311
 312        return features;
 313}
 314
 315/* IP header + UDP + VXLAN + Ethernet header */
 316#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
 317/* IPv6 header + UDP + VXLAN + Ethernet header */
 318#define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
 319
 320static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb)
 321{
 322        return (struct vxlanhdr *)(udp_hdr(skb) + 1);
 323}
 324
 325static inline __be32 vxlan_vni(__be32 vni_field)
 326{
 327#if defined(__BIG_ENDIAN)
 328        return (__force __be32)((__force u32)vni_field >> 8);
 329#else
 330        return (__force __be32)((__force u32)(vni_field & VXLAN_VNI_MASK) << 8);
 331#endif
 332}
 333
 334static inline __be32 vxlan_vni_field(__be32 vni)
 335{
 336#if defined(__BIG_ENDIAN)
 337        return (__force __be32)((__force u32)vni << 8);
 338#else
 339        return (__force __be32)((__force u32)vni >> 8);
 340#endif
 341}
 342
 343static inline size_t vxlan_rco_start(__be32 vni_field)
 344{
 345        return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
 346}
 347
 348static inline size_t vxlan_rco_offset(__be32 vni_field)
 349{
 350        return (vni_field & VXLAN_RCO_UDP) ?
 351                offsetof(struct udphdr, check) :
 352                offsetof(struct tcphdr, check);
 353}
 354
 355static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset)
 356{
 357        __be32 vni_field = cpu_to_be32(start >> VXLAN_RCO_SHIFT);
 358
 359        if (offset == offsetof(struct udphdr, check))
 360                vni_field |= VXLAN_RCO_UDP;
 361        return vni_field;
 362}
 363
 364static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
 365{
 366        return vs->sock->sk->sk_family;
 367}
 368
 369#endif
 370