linux/include/net/ip_tunnels.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef __NET_IP_TUNNELS_H
   3#define __NET_IP_TUNNELS_H 1
   4
   5#include <linux/if_tunnel.h>
   6#include <linux/netdevice.h>
   7#include <linux/skbuff.h>
   8#include <linux/socket.h>
   9#include <linux/types.h>
  10#include <linux/u64_stats_sync.h>
  11#include <linux/bitops.h>
  12
  13#include <net/dsfield.h>
  14#include <net/gro_cells.h>
  15#include <net/inet_ecn.h>
  16#include <net/netns/generic.h>
  17#include <net/rtnetlink.h>
  18#include <net/lwtunnel.h>
  19#include <net/dst_cache.h>
  20
  21#if IS_ENABLED(CONFIG_IPV6)
  22#include <net/ipv6.h>
  23#include <net/ip6_fib.h>
  24#include <net/ip6_route.h>
  25#endif
  26
  27/* Keep error state on tunnel for 30 sec */
  28#define IPTUNNEL_ERR_TIMEO      (30*HZ)
  29
  30/* Used to memset ip_tunnel padding. */
  31#define IP_TUNNEL_KEY_SIZE      offsetofend(struct ip_tunnel_key, tp_dst)
  32
  33/* Used to memset ipv4 address padding. */
  34#define IP_TUNNEL_KEY_IPV4_PAD  offsetofend(struct ip_tunnel_key, u.ipv4.dst)
  35#define IP_TUNNEL_KEY_IPV4_PAD_LEN                              \
  36        (FIELD_SIZEOF(struct ip_tunnel_key, u) -                \
  37         FIELD_SIZEOF(struct ip_tunnel_key, u.ipv4))
  38
  39struct ip_tunnel_key {
  40        __be64                  tun_id;
  41        union {
  42                struct {
  43                        __be32  src;
  44                        __be32  dst;
  45                } ipv4;
  46                struct {
  47                        struct in6_addr src;
  48                        struct in6_addr dst;
  49                } ipv6;
  50        } u;
  51        __be16                  tun_flags;
  52        u8                      tos;            /* TOS for IPv4, TC for IPv6 */
  53        u8                      ttl;            /* TTL for IPv4, HL for IPv6 */
  54        __be32                  label;          /* Flow Label for IPv6 */
  55        __be16                  tp_src;
  56        __be16                  tp_dst;
  57};
  58
  59/* Flags for ip_tunnel_info mode. */
  60#define IP_TUNNEL_INFO_TX       0x01    /* represents tx tunnel parameters */
  61#define IP_TUNNEL_INFO_IPV6     0x02    /* key contains IPv6 addresses */
  62#define IP_TUNNEL_INFO_BRIDGE   0x04    /* represents a bridged tunnel id */
  63
  64/* Maximum tunnel options length. */
  65#define IP_TUNNEL_OPTS_MAX                                      \
  66        GENMASK((FIELD_SIZEOF(struct ip_tunnel_info,            \
  67                              options_len) * BITS_PER_BYTE) - 1, 0)
  68
  69struct ip_tunnel_info {
  70        struct ip_tunnel_key    key;
  71#ifdef CONFIG_DST_CACHE
  72        struct dst_cache        dst_cache;
  73#endif
  74        u8                      options_len;
  75        u8                      mode;
  76};
  77
  78/* 6rd prefix/relay information */
  79#ifdef CONFIG_IPV6_SIT_6RD
  80struct ip_tunnel_6rd_parm {
  81        struct in6_addr         prefix;
  82        __be32                  relay_prefix;
  83        u16                     prefixlen;
  84        u16                     relay_prefixlen;
  85};
  86#endif
  87
  88struct ip_tunnel_encap {
  89        u16                     type;
  90        u16                     flags;
  91        __be16                  sport;
  92        __be16                  dport;
  93};
  94
  95struct ip_tunnel_prl_entry {
  96        struct ip_tunnel_prl_entry __rcu *next;
  97        __be32                          addr;
  98        u16                             flags;
  99        struct rcu_head                 rcu_head;
 100};
 101
 102struct metadata_dst;
 103
 104struct ip_tunnel {
 105        struct ip_tunnel __rcu  *next;
 106        struct hlist_node hash_node;
 107        struct net_device       *dev;
 108        struct net              *net;   /* netns for packet i/o */
 109
 110        unsigned long   err_time;       /* Time when the last ICMP error
 111                                         * arrived */
 112        int             err_count;      /* Number of arrived ICMP errors */
 113
 114        /* These four fields used only by GRE */
 115        u32             i_seqno;        /* The last seen seqno  */
 116        u32             o_seqno;        /* The last output seqno */
 117        int             tun_hlen;       /* Precalculated header length */
 118
 119        /* These four fields used only by ERSPAN */
 120        u32             index;          /* ERSPAN type II index */
 121        u8              erspan_ver;     /* ERSPAN version */
 122        u8              dir;            /* ERSPAN direction */
 123        u16             hwid;           /* ERSPAN hardware ID */
 124
 125        struct dst_cache dst_cache;
 126
 127        struct ip_tunnel_parm parms;
 128
 129        int             mlink;
 130        int             encap_hlen;     /* Encap header length (FOU,GUE) */
 131        int             hlen;           /* tun_hlen + encap_hlen */
 132        struct ip_tunnel_encap encap;
 133
 134        /* for SIT */
 135#ifdef CONFIG_IPV6_SIT_6RD
 136        struct ip_tunnel_6rd_parm ip6rd;
 137#endif
 138        struct ip_tunnel_prl_entry __rcu *prl;  /* potential router list */
 139        unsigned int            prl_count;      /* # of entries in PRL */
 140        unsigned int            ip_tnl_net_id;
 141        struct gro_cells        gro_cells;
 142        __u32                   fwmark;
 143        bool                    collect_md;
 144        bool                    ignore_df;
 145};
 146
 147#define TUNNEL_CSUM             __cpu_to_be16(0x01)
 148#define TUNNEL_ROUTING          __cpu_to_be16(0x02)
 149#define TUNNEL_KEY              __cpu_to_be16(0x04)
 150#define TUNNEL_SEQ              __cpu_to_be16(0x08)
 151#define TUNNEL_STRICT           __cpu_to_be16(0x10)
 152#define TUNNEL_REC              __cpu_to_be16(0x20)
 153#define TUNNEL_VERSION          __cpu_to_be16(0x40)
 154#define TUNNEL_NO_KEY           __cpu_to_be16(0x80)
 155#define TUNNEL_DONT_FRAGMENT    __cpu_to_be16(0x0100)
 156#define TUNNEL_OAM              __cpu_to_be16(0x0200)
 157#define TUNNEL_CRIT_OPT         __cpu_to_be16(0x0400)
 158#define TUNNEL_GENEVE_OPT       __cpu_to_be16(0x0800)
 159#define TUNNEL_VXLAN_OPT        __cpu_to_be16(0x1000)
 160#define TUNNEL_NOCACHE          __cpu_to_be16(0x2000)
 161#define TUNNEL_ERSPAN_OPT       __cpu_to_be16(0x4000)
 162
 163#define TUNNEL_OPTIONS_PRESENT \
 164                (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT | TUNNEL_ERSPAN_OPT)
 165
 166struct tnl_ptk_info {
 167        __be16 flags;
 168        __be16 proto;
 169        __be32 key;
 170        __be32 seq;
 171        int hdr_len;
 172};
 173
 174#define PACKET_RCVD     0
 175#define PACKET_REJECT   1
 176#define PACKET_NEXT     2
 177
 178#define IP_TNL_HASH_BITS   7
 179#define IP_TNL_HASH_SIZE   (1 << IP_TNL_HASH_BITS)
 180
 181struct ip_tunnel_net {
 182        struct net_device *fb_tunnel_dev;
 183        struct rtnl_link_ops *rtnl_link_ops;
 184        struct hlist_head tunnels[IP_TNL_HASH_SIZE];
 185        struct ip_tunnel __rcu *collect_md_tun;
 186        int type;
 187};
 188
 189static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
 190                                      __be32 saddr, __be32 daddr,
 191                                      u8 tos, u8 ttl, __be32 label,
 192                                      __be16 tp_src, __be16 tp_dst,
 193                                      __be64 tun_id, __be16 tun_flags)
 194{
 195        key->tun_id = tun_id;
 196        key->u.ipv4.src = saddr;
 197        key->u.ipv4.dst = daddr;
 198        memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD,
 199               0, IP_TUNNEL_KEY_IPV4_PAD_LEN);
 200        key->tos = tos;
 201        key->ttl = ttl;
 202        key->label = label;
 203        key->tun_flags = tun_flags;
 204
 205        /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
 206         * the upper tunnel are used.
 207         * E.g: GRE over IPSEC, the tp_src and tp_port are zero.
 208         */
 209        key->tp_src = tp_src;
 210        key->tp_dst = tp_dst;
 211
 212        /* Clear struct padding. */
 213        if (sizeof(*key) != IP_TUNNEL_KEY_SIZE)
 214                memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE,
 215                       0, sizeof(*key) - IP_TUNNEL_KEY_SIZE);
 216}
 217
 218static inline bool
 219ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
 220                           const struct ip_tunnel_info *info)
 221{
 222        if (skb->mark)
 223                return false;
 224        if (!info)
 225                return true;
 226        if (info->key.tun_flags & TUNNEL_NOCACHE)
 227                return false;
 228
 229        return true;
 230}
 231
 232static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info
 233                                               *tun_info)
 234{
 235        return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET;
 236}
 237
 238static inline __be64 key32_to_tunnel_id(__be32 key)
 239{
 240#ifdef __BIG_ENDIAN
 241        return (__force __be64)key;
 242#else
 243        return (__force __be64)((__force u64)key << 32);
 244#endif
 245}
 246
 247/* Returns the least-significant 32 bits of a __be64. */
 248static inline __be32 tunnel_id_to_key32(__be64 tun_id)
 249{
 250#ifdef __BIG_ENDIAN
 251        return (__force __be32)tun_id;
 252#else
 253        return (__force __be32)((__force u64)tun_id >> 32);
 254#endif
 255}
 256
 257#ifdef CONFIG_INET
 258
 259static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
 260                                       int proto,
 261                                       __be32 daddr, __be32 saddr,
 262                                       __be32 key, __u8 tos, int oif,
 263                                       __u32 mark)
 264{
 265        memset(fl4, 0, sizeof(*fl4));
 266        fl4->flowi4_oif = oif;
 267        fl4->daddr = daddr;
 268        fl4->saddr = saddr;
 269        fl4->flowi4_tos = tos;
 270        fl4->flowi4_proto = proto;
 271        fl4->fl4_gre_key = key;
 272        fl4->flowi4_mark = mark;
 273}
 274
 275int ip_tunnel_init(struct net_device *dev);
 276void ip_tunnel_uninit(struct net_device *dev);
 277void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
 278struct net *ip_tunnel_get_link_net(const struct net_device *dev);
 279int ip_tunnel_get_iflink(const struct net_device *dev);
 280int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
 281                       struct rtnl_link_ops *ops, char *devname);
 282
 283void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
 284                           struct rtnl_link_ops *ops);
 285
 286void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 287                    const struct iphdr *tnl_params, const u8 protocol);
 288void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 289                       const u8 proto);
 290int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
 291int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
 292int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
 293
 294void ip_tunnel_get_stats64(struct net_device *dev,
 295                           struct rtnl_link_stats64 *tot);
 296struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
 297                                   int link, __be16 flags,
 298                                   __be32 remote, __be32 local,
 299                                   __be32 key);
 300
 301int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
 302                  const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
 303                  bool log_ecn_error);
 304int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
 305                         struct ip_tunnel_parm *p, __u32 fwmark);
 306int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
 307                      struct ip_tunnel_parm *p, __u32 fwmark);
 308void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
 309
 310struct ip_tunnel_encap_ops {
 311        size_t (*encap_hlen)(struct ip_tunnel_encap *e);
 312        int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
 313                            u8 *protocol, struct flowi4 *fl4);
 314};
 315
 316#define MAX_IPTUN_ENCAP_OPS 8
 317
 318extern const struct ip_tunnel_encap_ops __rcu *
 319                iptun_encaps[MAX_IPTUN_ENCAP_OPS];
 320
 321int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op,
 322                            unsigned int num);
 323int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op,
 324                            unsigned int num);
 325
 326int ip_tunnel_encap_setup(struct ip_tunnel *t,
 327                          struct ip_tunnel_encap *ipencap);
 328
 329static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
 330{
 331        const struct ip_tunnel_encap_ops *ops;
 332        int hlen = -EINVAL;
 333
 334        if (e->type == TUNNEL_ENCAP_NONE)
 335                return 0;
 336
 337        if (e->type >= MAX_IPTUN_ENCAP_OPS)
 338                return -EINVAL;
 339
 340        rcu_read_lock();
 341        ops = rcu_dereference(iptun_encaps[e->type]);
 342        if (likely(ops && ops->encap_hlen))
 343                hlen = ops->encap_hlen(e);
 344        rcu_read_unlock();
 345
 346        return hlen;
 347}
 348
 349static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
 350                                  u8 *protocol, struct flowi4 *fl4)
 351{
 352        const struct ip_tunnel_encap_ops *ops;
 353        int ret = -EINVAL;
 354
 355        if (t->encap.type == TUNNEL_ENCAP_NONE)
 356                return 0;
 357
 358        if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
 359                return -EINVAL;
 360
 361        rcu_read_lock();
 362        ops = rcu_dereference(iptun_encaps[t->encap.type]);
 363        if (likely(ops && ops->build_header))
 364                ret = ops->build_header(skb, &t->encap, protocol, fl4);
 365        rcu_read_unlock();
 366
 367        return ret;
 368}
 369
 370/* Extract dsfield from inner protocol */
 371static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
 372                                       const struct sk_buff *skb)
 373{
 374        if (skb->protocol == htons(ETH_P_IP))
 375                return iph->tos;
 376        else if (skb->protocol == htons(ETH_P_IPV6))
 377                return ipv6_get_dsfield((const struct ipv6hdr *)iph);
 378        else
 379                return 0;
 380}
 381
 382static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
 383                                       const struct sk_buff *skb)
 384{
 385        if (skb->protocol == htons(ETH_P_IP))
 386                return iph->ttl;
 387        else if (skb->protocol == htons(ETH_P_IPV6))
 388                return ((const struct ipv6hdr *)iph)->hop_limit;
 389        else
 390                return 0;
 391}
 392
 393/* Propogate ECN bits out */
 394static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
 395                                     const struct sk_buff *skb)
 396{
 397        u8 inner = ip_tunnel_get_dsfield(iph, skb);
 398
 399        return INET_ECN_encapsulate(tos, inner);
 400}
 401
 402int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
 403                           __be16 inner_proto, bool raw_proto, bool xnet);
 404
 405static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
 406                                       __be16 inner_proto, bool xnet)
 407{
 408        return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet);
 409}
 410
 411void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
 412                   __be32 src, __be32 dst, u8 proto,
 413                   u8 tos, u8 ttl, __be16 df, bool xnet);
 414struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
 415                                             gfp_t flags);
 416
 417int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask);
 418
 419static inline int iptunnel_pull_offloads(struct sk_buff *skb)
 420{
 421        if (skb_is_gso(skb)) {
 422                int err;
 423
 424                err = skb_unclone(skb, GFP_ATOMIC);
 425                if (unlikely(err))
 426                        return err;
 427                skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >>
 428                                               NETIF_F_GSO_SHIFT);
 429        }
 430
 431        skb->encapsulation = 0;
 432        return 0;
 433}
 434
 435static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
 436{
 437        if (pkt_len > 0) {
 438                struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
 439
 440                u64_stats_update_begin(&tstats->syncp);
 441                tstats->tx_bytes += pkt_len;
 442                tstats->tx_packets++;
 443                u64_stats_update_end(&tstats->syncp);
 444                put_cpu_ptr(tstats);
 445        } else {
 446                struct net_device_stats *err_stats = &dev->stats;
 447
 448                if (pkt_len < 0) {
 449                        err_stats->tx_errors++;
 450                        err_stats->tx_aborted_errors++;
 451                } else {
 452                        err_stats->tx_dropped++;
 453                }
 454        }
 455}
 456
 457static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info)
 458{
 459        return info + 1;
 460}
 461
 462static inline void ip_tunnel_info_opts_get(void *to,
 463                                           const struct ip_tunnel_info *info)
 464{
 465        memcpy(to, info + 1, info->options_len);
 466}
 467
 468static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
 469                                           const void *from, int len,
 470                                           __be16 flags)
 471{
 472        memcpy(ip_tunnel_info_opts(info), from, len);
 473        info->options_len = len;
 474        info->key.tun_flags |= flags;
 475}
 476
 477static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
 478{
 479        return (struct ip_tunnel_info *)lwtstate->data;
 480}
 481
 482DECLARE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt);
 483
 484/* Returns > 0 if metadata should be collected */
 485static inline int ip_tunnel_collect_metadata(void)
 486{
 487        return static_branch_unlikely(&ip_tunnel_metadata_cnt);
 488}
 489
 490void __init ip_tunnel_core_init(void);
 491
 492void ip_tunnel_need_metadata(void);
 493void ip_tunnel_unneed_metadata(void);
 494
 495#else /* CONFIG_INET */
 496
 497static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
 498{
 499        return NULL;
 500}
 501
 502static inline void ip_tunnel_need_metadata(void)
 503{
 504}
 505
 506static inline void ip_tunnel_unneed_metadata(void)
 507{
 508}
 509
 510static inline void ip_tunnel_info_opts_get(void *to,
 511                                           const struct ip_tunnel_info *info)
 512{
 513}
 514
 515static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
 516                                           const void *from, int len,
 517                                           __be16 flags)
 518{
 519        info->options_len = 0;
 520        info->key.tun_flags |= flags;
 521}
 522
 523#endif /* CONFIG_INET */
 524
 525#endif /* __NET_IP_TUNNELS_H */
 526