linux/include/net/vxlan.h
<<
>>
Prefs
   1#ifndef __NET_VXLAN_H
   2#define __NET_VXLAN_H 1
   3
   4#include <linux/ip.h>
   5#include <linux/ipv6.h>
   6#include <linux/if_vlan.h>
   7#include <linux/skbuff.h>
   8#include <linux/netdevice.h>
   9#include <linux/udp.h>
  10#include <net/dst_metadata.h>
  11
  12/* VXLAN protocol (RFC 7348) header:
  13 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  14 * |R|R|R|R|I|R|R|R|               Reserved                        |
  15 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  16 * |                VXLAN Network Identifier (VNI) |   Reserved    |
  17 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  18 *
  19 * I = VXLAN Network Identifier (VNI) present.
  20 */
  21struct vxlanhdr {
  22        __be32 vx_flags;
  23        __be32 vx_vni;
  24};
  25
  26/* VXLAN header flags. */
  27#define VXLAN_HF_VNI    cpu_to_be32(BIT(27))
  28
  29#define VXLAN_N_VID     (1u << 24)
  30#define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
  31#define VXLAN_VNI_MASK  cpu_to_be32(VXLAN_VID_MASK << 8)
  32#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
  33
  34#define VNI_HASH_BITS   10
  35#define VNI_HASH_SIZE   (1<<VNI_HASH_BITS)
  36#define FDB_HASH_BITS   8
  37#define FDB_HASH_SIZE   (1<<FDB_HASH_BITS)
  38
  39/* Remote checksum offload for VXLAN (VXLAN_F_REMCSUM_[RT]X):
  40 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  41 * |R|R|R|R|I|R|R|R|R|R|C|              Reserved                   |
  42 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  43 * |           VXLAN Network Identifier (VNI)      |O| Csum start  |
  44 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  45 *
  46 * C = Remote checksum offload bit. When set indicates that the
  47 *     remote checksum offload data is present.
  48 *
  49 * O = Offset bit. Indicates the checksum offset relative to
  50 *     checksum start.
  51 *
  52 * Csum start = Checksum start divided by two.
  53 *
  54 * http://tools.ietf.org/html/draft-herbert-vxlan-rco
  55 */
  56
  57/* VXLAN-RCO header flags. */
  58#define VXLAN_HF_RCO    cpu_to_be32(BIT(21))
  59
  60/* Remote checksum offload header option */
  61#define VXLAN_RCO_MASK  cpu_to_be32(0x7f)  /* Last byte of vni field */
  62#define VXLAN_RCO_UDP   cpu_to_be32(0x80)  /* Indicate UDP RCO (TCP when not set *) */
  63#define VXLAN_RCO_SHIFT 1                  /* Left shift of start */
  64#define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1)
  65#define VXLAN_MAX_REMCSUM_START (0x7f << VXLAN_RCO_SHIFT)
  66
  67/*
  68 * VXLAN Group Based Policy Extension (VXLAN_F_GBP):
  69 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  70 * |G|R|R|R|I|R|R|R|R|D|R|R|A|R|R|R|        Group Policy ID        |
  71 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  72 * |                VXLAN Network Identifier (VNI) |   Reserved    |
  73 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  74 *
  75 * G = Group Policy ID present.
  76 *
  77 * D = Don't Learn bit. When set, this bit indicates that the egress
  78 *     VTEP MUST NOT learn the source address of the encapsulated frame.
  79 *
  80 * A = Indicates that the group policy has already been applied to
  81 *     this packet. Policies MUST NOT be applied by devices when the
  82 *     A bit is set.
  83 *
  84 * https://tools.ietf.org/html/draft-smith-vxlan-group-policy
  85 */
  86struct vxlanhdr_gbp {
  87        u8      vx_flags;
  88#ifdef __LITTLE_ENDIAN_BITFIELD
  89        u8      reserved_flags1:3,
  90                policy_applied:1,
  91                reserved_flags2:2,
  92                dont_learn:1,
  93                reserved_flags3:1;
  94#elif defined(__BIG_ENDIAN_BITFIELD)
  95        u8      reserved_flags1:1,
  96                dont_learn:1,
  97                reserved_flags2:2,
  98                policy_applied:1,
  99                reserved_flags3:3;
 100#else
 101#error  "Please fix <asm/byteorder.h>"
 102#endif
 103        __be16  policy_id;
 104        __be32  vx_vni;
 105};
 106
 107/* VXLAN-GBP header flags. */
 108#define VXLAN_HF_GBP    cpu_to_be32(BIT(31))
 109
 110#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | cpu_to_be32(0xFFFFFF))
 111
 112/* skb->mark mapping
 113 *
 114 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 115 * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R|        Group Policy ID        |
 116 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 117 */
 118#define VXLAN_GBP_DONT_LEARN            (BIT(6) << 16)
 119#define VXLAN_GBP_POLICY_APPLIED        (BIT(3) << 16)
 120#define VXLAN_GBP_ID_MASK               (0xFFFF)
 121
 122struct vxlan_metadata {
 123        u32             gbp;
 124};
 125
 126/* per UDP socket information */
 127struct vxlan_sock {
 128        struct hlist_node hlist;
 129        struct work_struct del_work;
 130        struct socket    *sock;
 131        struct rcu_head   rcu;
 132        struct hlist_head vni_list[VNI_HASH_SIZE];
 133        atomic_t          refcnt;
 134        struct udp_offload udp_offloads;
 135        u32               flags;
 136};
 137
 138union vxlan_addr {
 139        struct sockaddr_in sin;
 140        struct sockaddr_in6 sin6;
 141        struct sockaddr sa;
 142};
 143
 144struct vxlan_rdst {
 145        union vxlan_addr         remote_ip;
 146        __be16                   remote_port;
 147        __be32                   remote_vni;
 148        u32                      remote_ifindex;
 149        struct list_head         list;
 150        struct rcu_head          rcu;
 151        struct dst_cache         dst_cache;
 152};
 153
 154struct vxlan_config {
 155        union vxlan_addr        remote_ip;
 156        union vxlan_addr        saddr;
 157        __be32                  vni;
 158        int                     remote_ifindex;
 159        int                     mtu;
 160        __be16                  dst_port;
 161        u16                     port_min;
 162        u16                     port_max;
 163        u8                      tos;
 164        u8                      ttl;
 165        __be32                  label;
 166        u32                     flags;
 167        unsigned long           age_interval;
 168        unsigned int            addrmax;
 169        bool                    no_share;
 170};
 171
 172/* Pseudo network device */
 173struct vxlan_dev {
 174        struct hlist_node hlist;        /* vni hash table */
 175        struct list_head  next;         /* vxlan's per namespace list */
 176        struct vxlan_sock *vn4_sock;    /* listening socket for IPv4 */
 177#if IS_ENABLED(CONFIG_IPV6)
 178        struct vxlan_sock *vn6_sock;    /* listening socket for IPv6 */
 179#endif
 180        struct net_device *dev;
 181        struct net        *net;         /* netns for packet i/o */
 182        struct vxlan_rdst default_dst;  /* default destination */
 183        u32               flags;        /* VXLAN_F_* in vxlan.h */
 184
 185        struct timer_list age_timer;
 186        spinlock_t        hash_lock;
 187        unsigned int      addrcnt;
 188        struct gro_cells  gro_cells;
 189
 190        struct vxlan_config     cfg;
 191
 192        struct hlist_head fdb_head[FDB_HASH_SIZE];
 193};
 194
 195#define VXLAN_F_LEARN                   0x01
 196#define VXLAN_F_PROXY                   0x02
 197#define VXLAN_F_RSC                     0x04
 198#define VXLAN_F_L2MISS                  0x08
 199#define VXLAN_F_L3MISS                  0x10
 200#define VXLAN_F_IPV6                    0x20
 201#define VXLAN_F_UDP_ZERO_CSUM_TX        0x40
 202#define VXLAN_F_UDP_ZERO_CSUM6_TX       0x80
 203#define VXLAN_F_UDP_ZERO_CSUM6_RX       0x100
 204#define VXLAN_F_REMCSUM_TX              0x200
 205#define VXLAN_F_REMCSUM_RX              0x400
 206#define VXLAN_F_GBP                     0x800
 207#define VXLAN_F_REMCSUM_NOPARTIAL       0x1000
 208#define VXLAN_F_COLLECT_METADATA        0x2000
 209
 210/* Flags that are used in the receive path. These flags must match in
 211 * order for a socket to be shareable
 212 */
 213#define VXLAN_F_RCV_FLAGS               (VXLAN_F_GBP |                  \
 214                                         VXLAN_F_UDP_ZERO_CSUM6_RX |    \
 215                                         VXLAN_F_REMCSUM_RX |           \
 216                                         VXLAN_F_REMCSUM_NOPARTIAL |    \
 217                                         VXLAN_F_COLLECT_METADATA)
 218
 219struct net_device *vxlan_dev_create(struct net *net, const char *name,
 220                                    u8 name_assign_type, struct vxlan_config *conf);
 221
 222static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan,
 223                                        unsigned short family)
 224{
 225#if IS_ENABLED(CONFIG_IPV6)
 226        if (family == AF_INET6)
 227                return inet_sk(vxlan->vn6_sock->sock->sk)->inet_sport;
 228#endif
 229        return inet_sk(vxlan->vn4_sock->sock->sk)->inet_sport;
 230}
 231
 232static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
 233                                                     netdev_features_t features)
 234{
 235        u8 l4_hdr = 0;
 236
 237        if (!skb->encapsulation)
 238                return features;
 239
 240        switch (vlan_get_protocol(skb)) {
 241        case htons(ETH_P_IP):
 242                l4_hdr = ip_hdr(skb)->protocol;
 243                break;
 244        case htons(ETH_P_IPV6):
 245                l4_hdr = ipv6_hdr(skb)->nexthdr;
 246                break;
 247        default:
 248                return features;;
 249        }
 250
 251        if ((l4_hdr == IPPROTO_UDP) &&
 252            (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
 253             skb->inner_protocol != htons(ETH_P_TEB) ||
 254             (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
 255              sizeof(struct udphdr) + sizeof(struct vxlanhdr)) ||
 256             (skb->ip_summed != CHECKSUM_NONE &&
 257              !can_checksum_protocol(features, inner_eth_hdr(skb)->h_proto))))
 258                return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 259
 260        return features;
 261}
 262
 263/* IP header + UDP + VXLAN + Ethernet header */
 264#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
 265/* IPv6 header + UDP + VXLAN + Ethernet header */
 266#define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
 267
 268static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb)
 269{
 270        return (struct vxlanhdr *)(udp_hdr(skb) + 1);
 271}
 272
 273static inline __be32 vxlan_vni(__be32 vni_field)
 274{
 275#if defined(__BIG_ENDIAN)
 276        return (__force __be32)((__force u32)vni_field >> 8);
 277#else
 278        return (__force __be32)((__force u32)(vni_field & VXLAN_VNI_MASK) << 8);
 279#endif
 280}
 281
 282static inline __be32 vxlan_vni_field(__be32 vni)
 283{
 284#if defined(__BIG_ENDIAN)
 285        return (__force __be32)((__force u32)vni << 8);
 286#else
 287        return (__force __be32)((__force u32)vni >> 8);
 288#endif
 289}
 290
 291static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id)
 292{
 293#if defined(__BIG_ENDIAN)
 294        return (__force __be32)tun_id;
 295#else
 296        return (__force __be32)((__force u64)tun_id >> 32);
 297#endif
 298}
 299
 300static inline __be64 vxlan_vni_to_tun_id(__be32 vni)
 301{
 302#if defined(__BIG_ENDIAN)
 303        return (__force __be64)vni;
 304#else
 305        return (__force __be64)((u64)(__force u32)vni << 32);
 306#endif
 307}
 308
 309static inline size_t vxlan_rco_start(__be32 vni_field)
 310{
 311        return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
 312}
 313
 314static inline size_t vxlan_rco_offset(__be32 vni_field)
 315{
 316        return (vni_field & VXLAN_RCO_UDP) ?
 317                offsetof(struct udphdr, check) :
 318                offsetof(struct tcphdr, check);
 319}
 320
 321static inline __be32 vxlan_compute_rco(unsigned int start, unsigned int offset)
 322{
 323        __be32 vni_field = cpu_to_be32(start >> VXLAN_RCO_SHIFT);
 324
 325        if (offset == offsetof(struct udphdr, check))
 326                vni_field |= VXLAN_RCO_UDP;
 327        return vni_field;
 328}
 329
 330#if IS_ENABLED(CONFIG_VXLAN)
 331void vxlan_get_rx_port(struct net_device *netdev);
 332#else
 333static inline void vxlan_get_rx_port(struct net_device *netdev)
 334{
 335}
 336#endif
 337
 338static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
 339{
 340        return vs->sock->sk->sk_family;
 341}
 342
 343#endif
 344