linux/net/ipv4/ip_gre.c
<<
>>
Prefs
   1/*
   2 *      Linux NET3:     GRE over IP protocol decoder.
   3 *
   4 *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
   5 *
   6 *      This program is free software; you can redistribute it and/or
   7 *      modify it under the terms of the GNU General Public License
   8 *      as published by the Free Software Foundation; either version
   9 *      2 of the License, or (at your option) any later version.
  10 *
  11 */
  12
  13#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  14
  15#include <linux/capability.h>
  16#include <linux/module.h>
  17#include <linux/types.h>
  18#include <linux/kernel.h>
  19#include <linux/slab.h>
  20#include <linux/uaccess.h>
  21#include <linux/skbuff.h>
  22#include <linux/netdevice.h>
  23#include <linux/in.h>
  24#include <linux/tcp.h>
  25#include <linux/udp.h>
  26#include <linux/if_arp.h>
  27#include <linux/if_vlan.h>
  28#include <linux/init.h>
  29#include <linux/in6.h>
  30#include <linux/inetdevice.h>
  31#include <linux/igmp.h>
  32#include <linux/netfilter_ipv4.h>
  33#include <linux/etherdevice.h>
  34#include <linux/if_ether.h>
  35
  36#include <net/sock.h>
  37#include <net/ip.h>
  38#include <net/icmp.h>
  39#include <net/protocol.h>
  40#include <net/ip_tunnels.h>
  41#include <net/arp.h>
  42#include <net/checksum.h>
  43#include <net/dsfield.h>
  44#include <net/inet_ecn.h>
  45#include <net/xfrm.h>
  46#include <net/net_namespace.h>
  47#include <net/netns/generic.h>
  48#include <net/rtnetlink.h>
  49#include <net/gre.h>
  50#include <net/dst_metadata.h>
  51#include <net/erspan.h>
  52
  53/*
  54   Problems & solutions
  55   --------------------
  56
  57   1. The most important issue is detecting local dead loops.
  58   They would cause complete host lockup in transmit, which
  59   would be "resolved" by stack overflow or, if queueing is enabled,
  60   with infinite looping in net_bh.
  61
  62   We cannot track such dead loops during route installation,
  63   it is infeasible task. The most general solutions would be
  64   to keep skb->encapsulation counter (sort of local ttl),
  65   and silently drop packet when it expires. It is a good
  66   solution, but it supposes maintaining new variable in ALL
  67   skb, even if no tunneling is used.
  68
  69   Current solution: xmit_recursion breaks dead loops. This is a percpu
  70   counter, since when we enter the first ndo_xmit(), cpu migration is
  71   forbidden. We force an exit if this counter reaches RECURSION_LIMIT
  72
  73   2. Networking dead loops would not kill routers, but would really
  74   kill network. IP hop limit plays role of "t->recursion" in this case,
  75   if we copy it from packet being encapsulated to upper header.
  76   It is very good solution, but it introduces two problems:
  77
  78   - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
  79     do not work over tunnels.
  80   - traceroute does not work. I planned to relay ICMP from tunnel,
  81     so that this problem would be solved and traceroute output
  82     would even more informative. This idea appeared to be wrong:
  83     only Linux complies to rfc1812 now (yes, guys, Linux is the only
  84     true router now :-)), all routers (at least, in neighbourhood of mine)
  85     return only 8 bytes of payload. It is the end.
  86
  87   Hence, if we want that OSPF worked or traceroute said something reasonable,
  88   we should search for another solution.
  89
  90   One of them is to parse packet trying to detect inner encapsulation
  91   made by our node. It is difficult or even impossible, especially,
  92   taking into account fragmentation. TO be short, ttl is not solution at all.
  93
  94   Current solution: The solution was UNEXPECTEDLY SIMPLE.
  95   We force DF flag on tunnels with preconfigured hop limit,
  96   that is ALL. :-) Well, it does not remove the problem completely,
  97   but exponential growth of network traffic is changed to linear
  98   (branches, that exceed pmtu are pruned) and tunnel mtu
  99   rapidly degrades to value <68, where looping stops.
 100   Yes, it is not good if there exists a router in the loop,
 101   which does not force DF, even when encapsulating packets have DF set.
 102   But it is not our problem! Nobody could accuse us, we made
 103   all that we could make. Even if it is your gated who injected
 104   fatal route to network, even if it were you who configured
 105   fatal static route: you are innocent. :-)
 106
 107   Alexey Kuznetsov.
 108 */
 109
 110static bool log_ecn_error = true;
 111module_param(log_ecn_error, bool, 0644);
 112MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 113
 114static struct rtnl_link_ops ipgre_link_ops __read_mostly;
 115static int ipgre_tunnel_init(struct net_device *dev);
 116static void erspan_build_header(struct sk_buff *skb,
 117                                u32 id, u32 index,
 118                                bool truncate, bool is_ipv4);
 119
 120static unsigned int ipgre_net_id __read_mostly;
 121static unsigned int gre_tap_net_id __read_mostly;
 122static unsigned int erspan_net_id __read_mostly;
 123
 124static void ipgre_err(struct sk_buff *skb, u32 info,
 125                      const struct tnl_ptk_info *tpi)
 126{
 127
 128        /* All the routers (except for Linux) return only
 129           8 bytes of packet payload. It means, that precise relaying of
 130           ICMP in the real Internet is absolutely infeasible.
 131
 132           Moreover, Cisco "wise men" put GRE key to the third word
 133           in GRE header. It makes impossible maintaining even soft
 134           state for keyed GRE tunnels with enabled checksum. Tell
 135           them "thank you".
 136
 137           Well, I wonder, rfc1812 was written by Cisco employee,
 138           what the hell these idiots break standards established
 139           by themselves???
 140           */
 141        struct net *net = dev_net(skb->dev);
 142        struct ip_tunnel_net *itn;
 143        const struct iphdr *iph;
 144        const int type = icmp_hdr(skb)->type;
 145        const int code = icmp_hdr(skb)->code;
 146        unsigned int data_len = 0;
 147        struct ip_tunnel *t;
 148
 149        switch (type) {
 150        default:
 151        case ICMP_PARAMETERPROB:
 152                return;
 153
 154        case ICMP_DEST_UNREACH:
 155                switch (code) {
 156                case ICMP_SR_FAILED:
 157                case ICMP_PORT_UNREACH:
 158                        /* Impossible event. */
 159                        return;
 160                default:
 161                        /* All others are translated to HOST_UNREACH.
 162                           rfc2003 contains "deep thoughts" about NET_UNREACH,
 163                           I believe they are just ether pollution. --ANK
 164                         */
 165                        break;
 166                }
 167                break;
 168
 169        case ICMP_TIME_EXCEEDED:
 170                if (code != ICMP_EXC_TTL)
 171                        return;
 172                data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
 173                break;
 174
 175        case ICMP_REDIRECT:
 176                break;
 177        }
 178
 179        if (tpi->proto == htons(ETH_P_TEB))
 180                itn = net_generic(net, gre_tap_net_id);
 181        else if (tpi->proto == htons(ETH_P_ERSPAN) ||
 182                 tpi->proto == htons(ETH_P_ERSPAN2))
 183                itn = net_generic(net, erspan_net_id);
 184        else
 185                itn = net_generic(net, ipgre_net_id);
 186
 187        iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
 188        t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
 189                             iph->daddr, iph->saddr, tpi->key);
 190
 191        if (!t)
 192                return;
 193
 194#if IS_ENABLED(CONFIG_IPV6)
 195       if (tpi->proto == htons(ETH_P_IPV6) &&
 196           !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
 197                                       type, data_len))
 198               return;
 199#endif
 200
 201        if (t->parms.iph.daddr == 0 ||
 202            ipv4_is_multicast(t->parms.iph.daddr))
 203                return;
 204
 205        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 206                return;
 207
 208        if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 209                t->err_count++;
 210        else
 211                t->err_count = 1;
 212        t->err_time = jiffies;
 213}
 214
 215static void gre_err(struct sk_buff *skb, u32 info)
 216{
 217        /* All the routers (except for Linux) return only
 218         * 8 bytes of packet payload. It means, that precise relaying of
 219         * ICMP in the real Internet is absolutely infeasible.
 220         *
 221         * Moreover, Cisco "wise men" put GRE key to the third word
 222         * in GRE header. It makes impossible maintaining even soft
 223         * state for keyed
 224         * GRE tunnels with enabled checksum. Tell them "thank you".
 225         *
 226         * Well, I wonder, rfc1812 was written by Cisco employee,
 227         * what the hell these idiots break standards established
 228         * by themselves???
 229         */
 230
 231        const struct iphdr *iph = (struct iphdr *)skb->data;
 232        const int type = icmp_hdr(skb)->type;
 233        const int code = icmp_hdr(skb)->code;
 234        struct tnl_ptk_info tpi;
 235
 236        if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
 237                             iph->ihl * 4) < 0)
 238                return;
 239
 240        if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 241                ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 242                                 skb->dev->ifindex, IPPROTO_GRE);
 243                return;
 244        }
 245        if (type == ICMP_REDIRECT) {
 246                ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
 247                              IPPROTO_GRE);
 248                return;
 249        }
 250
 251        ipgre_err(skb, info, &tpi);
 252}
 253
 254static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 255                      int gre_hdr_len)
 256{
 257        struct net *net = dev_net(skb->dev);
 258        struct metadata_dst *tun_dst = NULL;
 259        struct erspan_base_hdr *ershdr;
 260        struct erspan_metadata *pkt_md;
 261        struct ip_tunnel_net *itn;
 262        struct ip_tunnel *tunnel;
 263        const struct iphdr *iph;
 264        struct erspan_md2 *md2;
 265        int ver;
 266        int len;
 267
 268        itn = net_generic(net, erspan_net_id);
 269        len = gre_hdr_len + sizeof(*ershdr);
 270
 271        /* Check based hdr len */
 272        if (unlikely(!pskb_may_pull(skb, len)))
 273                return PACKET_REJECT;
 274
 275        iph = ip_hdr(skb);
 276        ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
 277        ver = ershdr->ver;
 278
 279        /* The original GRE header does not have key field,
 280         * Use ERSPAN 10-bit session ID as key.
 281         */
 282        tpi->key = cpu_to_be32(get_session_id(ershdr));
 283        tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
 284                                  tpi->flags | TUNNEL_KEY,
 285                                  iph->saddr, iph->daddr, tpi->key);
 286
 287        if (tunnel) {
 288                len = gre_hdr_len + erspan_hdr_len(ver);
 289                if (unlikely(!pskb_may_pull(skb, len)))
 290                        return PACKET_REJECT;
 291
 292                ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
 293                pkt_md = (struct erspan_metadata *)(ershdr + 1);
 294
 295                if (__iptunnel_pull_header(skb,
 296                                           len,
 297                                           htons(ETH_P_TEB),
 298                                           false, false) < 0)
 299                        goto drop;
 300
 301                if (tunnel->collect_md) {
 302                        struct ip_tunnel_info *info;
 303                        struct erspan_metadata *md;
 304                        __be64 tun_id;
 305                        __be16 flags;
 306
 307                        tpi->flags |= TUNNEL_KEY;
 308                        flags = tpi->flags;
 309                        tun_id = key32_to_tunnel_id(tpi->key);
 310
 311                        tun_dst = ip_tun_rx_dst(skb, flags,
 312                                                tun_id, sizeof(*md));
 313                        if (!tun_dst)
 314                                return PACKET_REJECT;
 315
 316                        md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
 317                        md->version = ver;
 318                        md2 = &md->u.md2;
 319                        memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
 320                                                       ERSPAN_V2_MDSIZE);
 321
 322                        info = &tun_dst->u.tun_info;
 323                        info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
 324                        info->options_len = sizeof(*md);
 325                }
 326
 327                skb_reset_mac_header(skb);
 328                ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 329                return PACKET_RCVD;
 330        }
 331        return PACKET_REJECT;
 332
 333drop:
 334        kfree_skb(skb);
 335        return PACKET_RCVD;
 336}
 337
 338static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 339                       struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
 340{
 341        struct metadata_dst *tun_dst = NULL;
 342        const struct iphdr *iph;
 343        struct ip_tunnel *tunnel;
 344
 345        iph = ip_hdr(skb);
 346        tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
 347                                  iph->saddr, iph->daddr, tpi->key);
 348
 349        if (tunnel) {
 350                if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
 351                                           raw_proto, false) < 0)
 352                        goto drop;
 353
 354                if (tunnel->dev->type != ARPHRD_NONE)
 355                        skb_pop_mac_header(skb);
 356                else
 357                        skb_reset_mac_header(skb);
 358                if (tunnel->collect_md) {
 359                        __be16 flags;
 360                        __be64 tun_id;
 361
 362                        flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
 363                        tun_id = key32_to_tunnel_id(tpi->key);
 364                        tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
 365                        if (!tun_dst)
 366                                return PACKET_REJECT;
 367                }
 368
 369                ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 370                return PACKET_RCVD;
 371        }
 372        return PACKET_NEXT;
 373
 374drop:
 375        kfree_skb(skb);
 376        return PACKET_RCVD;
 377}
 378
 379static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 380                     int hdr_len)
 381{
 382        struct net *net = dev_net(skb->dev);
 383        struct ip_tunnel_net *itn;
 384        int res;
 385
 386        if (tpi->proto == htons(ETH_P_TEB))
 387                itn = net_generic(net, gre_tap_net_id);
 388        else
 389                itn = net_generic(net, ipgre_net_id);
 390
 391        res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
 392        if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
 393                /* ipgre tunnels in collect metadata mode should receive
 394                 * also ETH_P_TEB traffic.
 395                 */
 396                itn = net_generic(net, ipgre_net_id);
 397                res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
 398        }
 399        return res;
 400}
 401
 402static int gre_rcv(struct sk_buff *skb)
 403{
 404        struct tnl_ptk_info tpi;
 405        bool csum_err = false;
 406        int hdr_len;
 407
 408#ifdef CONFIG_NET_IPGRE_BROADCAST
 409        if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
 410                /* Looped back packet, drop it! */
 411                if (rt_is_output_route(skb_rtable(skb)))
 412                        goto drop;
 413        }
 414#endif
 415
 416        hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
 417        if (hdr_len < 0)
 418                goto drop;
 419
 420        if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
 421                     tpi.proto == htons(ETH_P_ERSPAN2))) {
 422                if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 423                        return 0;
 424                goto out;
 425        }
 426
 427        if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
 428                return 0;
 429
 430out:
 431        icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
 432drop:
 433        kfree_skb(skb);
 434        return 0;
 435}
 436
 437static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
 438                       const struct iphdr *tnl_params,
 439                       __be16 proto)
 440{
 441        struct ip_tunnel *tunnel = netdev_priv(dev);
 442
 443        if (tunnel->parms.o_flags & TUNNEL_SEQ)
 444                tunnel->o_seqno++;
 445
 446        /* Push GRE header. */
 447        gre_build_header(skb, tunnel->tun_hlen,
 448                         tunnel->parms.o_flags, proto, tunnel->parms.o_key,
 449                         htonl(tunnel->o_seqno));
 450
 451        ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
 452}
 453
 454static int gre_handle_offloads(struct sk_buff *skb, bool csum)
 455{
 456        return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 457}
 458
 459static struct rtable *gre_get_rt(struct sk_buff *skb,
 460                                 struct net_device *dev,
 461                                 struct flowi4 *fl,
 462                                 const struct ip_tunnel_key *key)
 463{
 464        struct net *net = dev_net(dev);
 465
 466        memset(fl, 0, sizeof(*fl));
 467        fl->daddr = key->u.ipv4.dst;
 468        fl->saddr = key->u.ipv4.src;
 469        fl->flowi4_tos = RT_TOS(key->tos);
 470        fl->flowi4_mark = skb->mark;
 471        fl->flowi4_proto = IPPROTO_GRE;
 472
 473        return ip_route_output_key(net, fl);
 474}
 475
 476static struct rtable *prepare_fb_xmit(struct sk_buff *skb,
 477                                      struct net_device *dev,
 478                                      struct flowi4 *fl,
 479                                      int tunnel_hlen)
 480{
 481        struct ip_tunnel_info *tun_info;
 482        const struct ip_tunnel_key *key;
 483        struct rtable *rt = NULL;
 484        int min_headroom;
 485        bool use_cache;
 486        int err;
 487
 488        tun_info = skb_tunnel_info(skb);
 489        key = &tun_info->key;
 490        use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
 491
 492        if (use_cache)
 493                rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr);
 494        if (!rt) {
 495                rt = gre_get_rt(skb, dev, fl, key);
 496                if (IS_ERR(rt))
 497                        goto err_free_skb;
 498                if (use_cache)
 499                        dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
 500                                          fl->saddr);
 501        }
 502
 503        min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 504                        + tunnel_hlen + sizeof(struct iphdr);
 505        if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
 506                int head_delta = SKB_DATA_ALIGN(min_headroom -
 507                                                skb_headroom(skb) +
 508                                                16);
 509                err = pskb_expand_head(skb, max_t(int, head_delta, 0),
 510                                       0, GFP_ATOMIC);
 511                if (unlikely(err))
 512                        goto err_free_rt;
 513        }
 514        return rt;
 515
 516err_free_rt:
 517        ip_rt_put(rt);
 518err_free_skb:
 519        kfree_skb(skb);
 520        dev->stats.tx_dropped++;
 521        return NULL;
 522}
 523
 524static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 525                        __be16 proto)
 526{
 527        struct ip_tunnel *tunnel = netdev_priv(dev);
 528        struct ip_tunnel_info *tun_info;
 529        const struct ip_tunnel_key *key;
 530        struct rtable *rt = NULL;
 531        struct flowi4 fl;
 532        int tunnel_hlen;
 533        __be16 df, flags;
 534
 535        tun_info = skb_tunnel_info(skb);
 536        if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 537                     ip_tunnel_info_af(tun_info) != AF_INET))
 538                goto err_free_skb;
 539
 540        key = &tun_info->key;
 541        tunnel_hlen = gre_calc_hlen(key->tun_flags);
 542
 543        rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
 544        if (!rt)
 545                return;
 546
 547        /* Push Tunnel header. */
 548        if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM)))
 549                goto err_free_rt;
 550
 551        flags = tun_info->key.tun_flags &
 552                (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
 553        gre_build_header(skb, tunnel_hlen, flags, proto,
 554                         tunnel_id_to_key32(tun_info->key.tun_id),
 555                         (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
 556
 557        df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 558
 559        iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
 560                      key->tos, key->ttl, df, false);
 561        return;
 562
 563err_free_rt:
 564        ip_rt_put(rt);
 565err_free_skb:
 566        kfree_skb(skb);
 567        dev->stats.tx_dropped++;
 568}
 569
 570static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
 571                           __be16 proto)
 572{
 573        struct ip_tunnel *tunnel = netdev_priv(dev);
 574        struct ip_tunnel_info *tun_info;
 575        const struct ip_tunnel_key *key;
 576        struct erspan_metadata *md;
 577        struct rtable *rt = NULL;
 578        bool truncate = false;
 579        struct flowi4 fl;
 580        int tunnel_hlen;
 581        int version;
 582        __be16 df;
 583        int nhoff;
 584        int thoff;
 585
 586        tun_info = skb_tunnel_info(skb);
 587        if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
 588                     ip_tunnel_info_af(tun_info) != AF_INET))
 589                goto err_free_skb;
 590
 591        key = &tun_info->key;
 592        if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
 593                goto err_free_rt;
 594        md = ip_tunnel_info_opts(tun_info);
 595        if (!md)
 596                goto err_free_rt;
 597
 598        /* ERSPAN has fixed 8 byte GRE header */
 599        version = md->version;
 600        tunnel_hlen = 8 + erspan_hdr_len(version);
 601
 602        rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
 603        if (!rt)
 604                return;
 605
 606        if (gre_handle_offloads(skb, false))
 607                goto err_free_rt;
 608
 609        if (skb->len > dev->mtu + dev->hard_header_len) {
 610                pskb_trim(skb, dev->mtu + dev->hard_header_len);
 611                truncate = true;
 612        }
 613
 614        nhoff = skb_network_header(skb) - skb_mac_header(skb);
 615        if (skb->protocol == htons(ETH_P_IP) &&
 616            (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
 617                truncate = true;
 618
 619        thoff = skb_transport_header(skb) - skb_mac_header(skb);
 620        if (skb->protocol == htons(ETH_P_IPV6) &&
 621            (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
 622                truncate = true;
 623
 624        if (version == 1) {
 625                erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
 626                                    ntohl(md->u.index), truncate, true);
 627        } else if (version == 2) {
 628                erspan_build_header_v2(skb,
 629                                       ntohl(tunnel_id_to_key32(key->tun_id)),
 630                                       md->u.md2.dir,
 631                                       get_hwid(&md->u.md2),
 632                                       truncate, true);
 633        } else {
 634                goto err_free_rt;
 635        }
 636
 637        gre_build_header(skb, 8, TUNNEL_SEQ,
 638                         htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
 639
 640        df = key->tun_flags & TUNNEL_DONT_FRAGMENT ?  htons(IP_DF) : 0;
 641
 642        iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
 643                      key->tos, key->ttl, df, false);
 644        return;
 645
 646err_free_rt:
 647        ip_rt_put(rt);
 648err_free_skb:
 649        kfree_skb(skb);
 650        dev->stats.tx_dropped++;
 651}
 652
 653static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 654{
 655        struct ip_tunnel_info *info = skb_tunnel_info(skb);
 656        struct rtable *rt;
 657        struct flowi4 fl4;
 658
 659        if (ip_tunnel_info_af(info) != AF_INET)
 660                return -EINVAL;
 661
 662        rt = gre_get_rt(skb, dev, &fl4, &info->key);
 663        if (IS_ERR(rt))
 664                return PTR_ERR(rt);
 665
 666        ip_rt_put(rt);
 667        info->key.u.ipv4.src = fl4.saddr;
 668        return 0;
 669}
 670
 671static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 672                              struct net_device *dev)
 673{
 674        struct ip_tunnel *tunnel = netdev_priv(dev);
 675        const struct iphdr *tnl_params;
 676
 677        if (tunnel->collect_md) {
 678                gre_fb_xmit(skb, dev, skb->protocol);
 679                return NETDEV_TX_OK;
 680        }
 681
 682        if (dev->header_ops) {
 683                /* Need space for new headers */
 684                if (skb_cow_head(skb, dev->needed_headroom -
 685                                      (tunnel->hlen + sizeof(struct iphdr))))
 686                        goto free_skb;
 687
 688                tnl_params = (const struct iphdr *)skb->data;
 689
 690                /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
 691                 * to gre header.
 692                 */
 693                skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
 694                skb_reset_mac_header(skb);
 695        } else {
 696                if (skb_cow_head(skb, dev->needed_headroom))
 697                        goto free_skb;
 698
 699                tnl_params = &tunnel->parms.iph;
 700        }
 701
 702        if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
 703                goto free_skb;
 704
 705        __gre_xmit(skb, dev, tnl_params, skb->protocol);
 706        return NETDEV_TX_OK;
 707
 708free_skb:
 709        kfree_skb(skb);
 710        dev->stats.tx_dropped++;
 711        return NETDEV_TX_OK;
 712}
 713
 714static netdev_tx_t erspan_xmit(struct sk_buff *skb,
 715                               struct net_device *dev)
 716{
 717        struct ip_tunnel *tunnel = netdev_priv(dev);
 718        bool truncate = false;
 719
 720        if (tunnel->collect_md) {
 721                erspan_fb_xmit(skb, dev, skb->protocol);
 722                return NETDEV_TX_OK;
 723        }
 724
 725        if (gre_handle_offloads(skb, false))
 726                goto free_skb;
 727
 728        if (skb_cow_head(skb, dev->needed_headroom))
 729                goto free_skb;
 730
 731        if (skb->len > dev->mtu + dev->hard_header_len) {
 732                pskb_trim(skb, dev->mtu + dev->hard_header_len);
 733                truncate = true;
 734        }
 735
 736        /* Push ERSPAN header */
 737        if (tunnel->erspan_ver == 1)
 738                erspan_build_header(skb, ntohl(tunnel->parms.o_key),
 739                                    tunnel->index,
 740                                    truncate, true);
 741        else if (tunnel->erspan_ver == 2)
 742                erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
 743                                       tunnel->dir, tunnel->hwid,
 744                                       truncate, true);
 745        else
 746                goto free_skb;
 747
 748        tunnel->parms.o_flags &= ~TUNNEL_KEY;
 749        __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
 750        return NETDEV_TX_OK;
 751
 752free_skb:
 753        kfree_skb(skb);
 754        dev->stats.tx_dropped++;
 755        return NETDEV_TX_OK;
 756}
 757
 758static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
 759                                struct net_device *dev)
 760{
 761        struct ip_tunnel *tunnel = netdev_priv(dev);
 762
 763        if (tunnel->collect_md) {
 764                gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
 765                return NETDEV_TX_OK;
 766        }
 767
 768        if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
 769                goto free_skb;
 770
 771        if (skb_cow_head(skb, dev->needed_headroom))
 772                goto free_skb;
 773
 774        __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
 775        return NETDEV_TX_OK;
 776
 777free_skb:
 778        kfree_skb(skb);
 779        dev->stats.tx_dropped++;
 780        return NETDEV_TX_OK;
 781}
 782
 783static void ipgre_link_update(struct net_device *dev, bool set_mtu)
 784{
 785        struct ip_tunnel *tunnel = netdev_priv(dev);
 786        int len;
 787
 788        len = tunnel->tun_hlen;
 789        tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
 790        len = tunnel->tun_hlen - len;
 791        tunnel->hlen = tunnel->hlen + len;
 792
 793        dev->needed_headroom = dev->needed_headroom + len;
 794        if (set_mtu)
 795                dev->mtu = max_t(int, dev->mtu - len, 68);
 796
 797        if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
 798                if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
 799                    tunnel->encap.type == TUNNEL_ENCAP_NONE) {
 800                        dev->features |= NETIF_F_GSO_SOFTWARE;
 801                        dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 802                } else {
 803                        dev->features &= ~NETIF_F_GSO_SOFTWARE;
 804                        dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
 805                }
 806                dev->features |= NETIF_F_LLTX;
 807        } else {
 808                dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
 809                dev->features &= ~(NETIF_F_LLTX | NETIF_F_GSO_SOFTWARE);
 810        }
 811}
 812
 813static int ipgre_tunnel_ioctl(struct net_device *dev,
 814                              struct ifreq *ifr, int cmd)
 815{
 816        struct ip_tunnel_parm p;
 817        int err;
 818
 819        if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 820                return -EFAULT;
 821
 822        if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
 823                if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
 824                    p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
 825                    ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
 826                        return -EINVAL;
 827        }
 828
 829        p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
 830        p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
 831
 832        err = ip_tunnel_ioctl(dev, &p, cmd);
 833        if (err)
 834                return err;
 835
 836        if (cmd == SIOCCHGTUNNEL) {
 837                struct ip_tunnel *t = netdev_priv(dev);
 838
 839                t->parms.i_flags = p.i_flags;
 840                t->parms.o_flags = p.o_flags;
 841
 842                if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
 843                        ipgre_link_update(dev, true);
 844        }
 845
 846        p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
 847        p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
 848
 849        if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 850                return -EFAULT;
 851
 852        return 0;
 853}
 854
 855/* Nice toy. Unfortunately, useless in real life :-)
 856   It allows to construct virtual multiprotocol broadcast "LAN"
 857   over the Internet, provided multicast routing is tuned.
 858
 859
 860   I have no idea was this bicycle invented before me,
 861   so that I had to set ARPHRD_IPGRE to a random value.
 862   I have an impression, that Cisco could make something similar,
 863   but this feature is apparently missing in IOS<=11.2(8).
 864
 865   I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
 866   with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
 867
 868   ping -t 255 224.66.66.66
 869
 870   If nobody answers, mbone does not work.
 871
 872   ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
 873   ip addr add 10.66.66.<somewhat>/24 dev Universe
 874   ifconfig Universe up
 875   ifconfig Universe add fe80::<Your_real_addr>/10
 876   ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
 877   ftp 10.66.66.66
 878   ...
 879   ftp fec0:6666:6666::193.233.7.65
 880   ...
 881 */
 882static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
 883                        unsigned short type,
 884                        const void *daddr, const void *saddr, unsigned int len)
 885{
 886        struct ip_tunnel *t = netdev_priv(dev);
 887        struct iphdr *iph;
 888        struct gre_base_hdr *greh;
 889
 890        iph = skb_push(skb, t->hlen + sizeof(*iph));
 891        greh = (struct gre_base_hdr *)(iph+1);
 892        greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
 893        greh->protocol = htons(type);
 894
 895        memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
 896
 897        /* Set the source hardware address. */
 898        if (saddr)
 899                memcpy(&iph->saddr, saddr, 4);
 900        if (daddr)
 901                memcpy(&iph->daddr, daddr, 4);
 902        if (iph->daddr)
 903                return t->hlen + sizeof(*iph);
 904
 905        return -(t->hlen + sizeof(*iph));
 906}
 907
 908static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
 909{
 910        const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
 911        memcpy(haddr, &iph->saddr, 4);
 912        return 4;
 913}
 914
 915static const struct header_ops ipgre_header_ops = {
 916        .create = ipgre_header,
 917        .parse  = ipgre_header_parse,
 918};
 919
 920#ifdef CONFIG_NET_IPGRE_BROADCAST
 921static int ipgre_open(struct net_device *dev)
 922{
 923        struct ip_tunnel *t = netdev_priv(dev);
 924
 925        if (ipv4_is_multicast(t->parms.iph.daddr)) {
 926                struct flowi4 fl4;
 927                struct rtable *rt;
 928
 929                rt = ip_route_output_gre(t->net, &fl4,
 930                                         t->parms.iph.daddr,
 931                                         t->parms.iph.saddr,
 932                                         t->parms.o_key,
 933                                         RT_TOS(t->parms.iph.tos),
 934                                         t->parms.link);
 935                if (IS_ERR(rt))
 936                        return -EADDRNOTAVAIL;
 937                dev = rt->dst.dev;
 938                ip_rt_put(rt);
 939                if (!__in_dev_get_rtnl(dev))
 940                        return -EADDRNOTAVAIL;
 941                t->mlink = dev->ifindex;
 942                ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
 943        }
 944        return 0;
 945}
 946
 947static int ipgre_close(struct net_device *dev)
 948{
 949        struct ip_tunnel *t = netdev_priv(dev);
 950
 951        if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
 952                struct in_device *in_dev;
 953                in_dev = inetdev_by_index(t->net, t->mlink);
 954                if (in_dev)
 955                        ip_mc_dec_group(in_dev, t->parms.iph.daddr);
 956        }
 957        return 0;
 958}
 959#endif
 960
 961static const struct net_device_ops ipgre_netdev_ops = {
 962        .ndo_init               = ipgre_tunnel_init,
 963        .ndo_uninit             = ip_tunnel_uninit,
 964#ifdef CONFIG_NET_IPGRE_BROADCAST
 965        .ndo_open               = ipgre_open,
 966        .ndo_stop               = ipgre_close,
 967#endif
 968        .ndo_start_xmit         = ipgre_xmit,
 969        .ndo_do_ioctl           = ipgre_tunnel_ioctl,
 970        .ndo_change_mtu         = ip_tunnel_change_mtu,
 971        .ndo_get_stats64        = ip_tunnel_get_stats64,
 972        .ndo_get_iflink         = ip_tunnel_get_iflink,
 973};
 974
 975#define GRE_FEATURES (NETIF_F_SG |              \
 976                      NETIF_F_FRAGLIST |        \
 977                      NETIF_F_HIGHDMA |         \
 978                      NETIF_F_HW_CSUM)
 979
 980static void ipgre_tunnel_setup(struct net_device *dev)
 981{
 982        dev->netdev_ops         = &ipgre_netdev_ops;
 983        dev->type               = ARPHRD_IPGRE;
 984        ip_tunnel_setup(dev, ipgre_net_id);
 985}
 986
 987static void __gre_tunnel_init(struct net_device *dev)
 988{
 989        struct ip_tunnel *tunnel;
 990
 991        tunnel = netdev_priv(dev);
 992        tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
 993        tunnel->parms.iph.protocol = IPPROTO_GRE;
 994
 995        tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
 996
 997        dev->features           |= GRE_FEATURES;
 998        dev->hw_features        |= GRE_FEATURES;
 999
1000        if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
1001                /* TCP offload with GRE SEQ is not supported, nor
1002                 * can we support 2 levels of outer headers requiring
1003                 * an update.
1004                 */
1005                if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
1006                    (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
1007                        dev->features    |= NETIF_F_GSO_SOFTWARE;
1008                        dev->hw_features |= NETIF_F_GSO_SOFTWARE;
1009                }
1010
1011                /* Can use a lockless transmit, unless we generate
1012                 * output sequences
1013                 */
1014                dev->features |= NETIF_F_LLTX;
1015        }
1016}
1017
1018static int ipgre_tunnel_init(struct net_device *dev)
1019{
1020        struct ip_tunnel *tunnel = netdev_priv(dev);
1021        struct iphdr *iph = &tunnel->parms.iph;
1022
1023        __gre_tunnel_init(dev);
1024
1025        memcpy(dev->dev_addr, &iph->saddr, 4);
1026        memcpy(dev->broadcast, &iph->daddr, 4);
1027
1028        dev->flags              = IFF_NOARP;
1029        netif_keep_dst(dev);
1030        dev->addr_len           = 4;
1031
1032        if (iph->daddr && !tunnel->collect_md) {
1033#ifdef CONFIG_NET_IPGRE_BROADCAST
1034                if (ipv4_is_multicast(iph->daddr)) {
1035                        if (!iph->saddr)
1036                                return -EINVAL;
1037                        dev->flags = IFF_BROADCAST;
1038                        dev->header_ops = &ipgre_header_ops;
1039                }
1040#endif
1041        } else if (!tunnel->collect_md) {
1042                dev->header_ops = &ipgre_header_ops;
1043        }
1044
1045        return ip_tunnel_init(dev);
1046}
1047
1048static const struct gre_protocol ipgre_protocol = {
1049        .handler     = gre_rcv,
1050        .err_handler = gre_err,
1051};
1052
1053static int __net_init ipgre_init_net(struct net *net)
1054{
1055        return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
1056}
1057
1058static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
1059{
1060        ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
1061}
1062
1063static struct pernet_operations ipgre_net_ops = {
1064        .init = ipgre_init_net,
1065        .exit_batch = ipgre_exit_batch_net,
1066        .id   = &ipgre_net_id,
1067        .size = sizeof(struct ip_tunnel_net),
1068};
1069
1070static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
1071                                 struct netlink_ext_ack *extack)
1072{
1073        __be16 flags;
1074
1075        if (!data)
1076                return 0;
1077
1078        flags = 0;
1079        if (data[IFLA_GRE_IFLAGS])
1080                flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1081        if (data[IFLA_GRE_OFLAGS])
1082                flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1083        if (flags & (GRE_VERSION|GRE_ROUTING))
1084                return -EINVAL;
1085
1086        if (data[IFLA_GRE_COLLECT_METADATA] &&
1087            data[IFLA_GRE_ENCAP_TYPE] &&
1088            nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
1089                return -EINVAL;
1090
1091        return 0;
1092}
1093
1094static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
1095                              struct netlink_ext_ack *extack)
1096{
1097        __be32 daddr;
1098
1099        if (tb[IFLA_ADDRESS]) {
1100                if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1101                        return -EINVAL;
1102                if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1103                        return -EADDRNOTAVAIL;
1104        }
1105
1106        if (!data)
1107                goto out;
1108
1109        if (data[IFLA_GRE_REMOTE]) {
1110                memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1111                if (!daddr)
1112                        return -EINVAL;
1113        }
1114
1115out:
1116        return ipgre_tunnel_validate(tb, data, extack);
1117}
1118
1119static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
1120                           struct netlink_ext_ack *extack)
1121{
1122        __be16 flags = 0;
1123        int ret;
1124
1125        if (!data)
1126                return 0;
1127
1128        ret = ipgre_tap_validate(tb, data, extack);
1129        if (ret)
1130                return ret;
1131
1132        /* ERSPAN should only have GRE sequence and key flag */
1133        if (data[IFLA_GRE_OFLAGS])
1134                flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1135        if (data[IFLA_GRE_IFLAGS])
1136                flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1137        if (!data[IFLA_GRE_COLLECT_METADATA] &&
1138            flags != (GRE_SEQ | GRE_KEY))
1139                return -EINVAL;
1140
1141        /* ERSPAN Session ID only has 10-bit. Since we reuse
1142         * 32-bit key field as ID, check it's range.
1143         */
1144        if (data[IFLA_GRE_IKEY] &&
1145            (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
1146                return -EINVAL;
1147
1148        if (data[IFLA_GRE_OKEY] &&
1149            (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
1150                return -EINVAL;
1151
1152        return 0;
1153}
1154
1155static int ipgre_netlink_parms(struct net_device *dev,
1156                                struct nlattr *data[],
1157                                struct nlattr *tb[],
1158                                struct ip_tunnel_parm *parms,
1159                                __u32 *fwmark)
1160{
1161        struct ip_tunnel *t = netdev_priv(dev);
1162
1163        memset(parms, 0, sizeof(*parms));
1164
1165        parms->iph.protocol = IPPROTO_GRE;
1166
1167        if (!data)
1168                return 0;
1169
1170        if (data[IFLA_GRE_LINK])
1171                parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1172
1173        if (data[IFLA_GRE_IFLAGS])
1174                parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
1175
1176        if (data[IFLA_GRE_OFLAGS])
1177                parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
1178
1179        if (data[IFLA_GRE_IKEY])
1180                parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1181
1182        if (data[IFLA_GRE_OKEY])
1183                parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1184
1185        if (data[IFLA_GRE_LOCAL])
1186                parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
1187
1188        if (data[IFLA_GRE_REMOTE])
1189                parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
1190
1191        if (data[IFLA_GRE_TTL])
1192                parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1193
1194        if (data[IFLA_GRE_TOS])
1195                parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1196
1197        if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
1198                if (t->ignore_df)
1199                        return -EINVAL;
1200                parms->iph.frag_off = htons(IP_DF);
1201        }
1202
1203        if (data[IFLA_GRE_COLLECT_METADATA]) {
1204                t->collect_md = true;
1205                if (dev->type == ARPHRD_IPGRE)
1206                        dev->type = ARPHRD_NONE;
1207        }
1208
1209        if (data[IFLA_GRE_IGNORE_DF]) {
1210                if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
1211                  && (parms->iph.frag_off & htons(IP_DF)))
1212                        return -EINVAL;
1213                t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
1214        }
1215
1216        if (data[IFLA_GRE_FWMARK])
1217                *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
1218
1219        if (data[IFLA_GRE_ERSPAN_VER]) {
1220                t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
1221
1222                if (t->erspan_ver != 1 && t->erspan_ver != 2)
1223                        return -EINVAL;
1224        }
1225
1226        if (t->erspan_ver == 1) {
1227                if (data[IFLA_GRE_ERSPAN_INDEX]) {
1228                        t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
1229                        if (t->index & ~INDEX_MASK)
1230                                return -EINVAL;
1231                }
1232        } else if (t->erspan_ver == 2) {
1233                if (data[IFLA_GRE_ERSPAN_DIR]) {
1234                        t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
1235                        if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
1236                                return -EINVAL;
1237                }
1238                if (data[IFLA_GRE_ERSPAN_HWID]) {
1239                        t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
1240                        if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
1241                                return -EINVAL;
1242                }
1243        }
1244
1245        return 0;
1246}
1247
1248/* This function returns true when ENCAP attributes are present in the nl msg */
1249static bool ipgre_netlink_encap_parms(struct nlattr *data[],
1250                                      struct ip_tunnel_encap *ipencap)
1251{
1252        bool ret = false;
1253
1254        memset(ipencap, 0, sizeof(*ipencap));
1255
1256        if (!data)
1257                return ret;
1258
1259        if (data[IFLA_GRE_ENCAP_TYPE]) {
1260                ret = true;
1261                ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
1262        }
1263
1264        if (data[IFLA_GRE_ENCAP_FLAGS]) {
1265                ret = true;
1266                ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
1267        }
1268
1269        if (data[IFLA_GRE_ENCAP_SPORT]) {
1270                ret = true;
1271                ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
1272        }
1273
1274        if (data[IFLA_GRE_ENCAP_DPORT]) {
1275                ret = true;
1276                ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
1277        }
1278
1279        return ret;
1280}
1281
1282static int gre_tap_init(struct net_device *dev)
1283{
1284        __gre_tunnel_init(dev);
1285        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1286        netif_keep_dst(dev);
1287
1288        return ip_tunnel_init(dev);
1289}
1290
1291static const struct net_device_ops gre_tap_netdev_ops = {
1292        .ndo_init               = gre_tap_init,
1293        .ndo_uninit             = ip_tunnel_uninit,
1294        .ndo_start_xmit         = gre_tap_xmit,
1295        .ndo_set_mac_address    = eth_mac_addr,
1296        .ndo_validate_addr      = eth_validate_addr,
1297        .ndo_change_mtu         = ip_tunnel_change_mtu,
1298        .ndo_get_stats64        = ip_tunnel_get_stats64,
1299        .ndo_get_iflink         = ip_tunnel_get_iflink,
1300        .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1301};
1302
1303static int erspan_tunnel_init(struct net_device *dev)
1304{
1305        struct ip_tunnel *tunnel = netdev_priv(dev);
1306
1307        tunnel->tun_hlen = 8;
1308        tunnel->parms.iph.protocol = IPPROTO_GRE;
1309        tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
1310                       erspan_hdr_len(tunnel->erspan_ver);
1311
1312        dev->features           |= GRE_FEATURES;
1313        dev->hw_features        |= GRE_FEATURES;
1314        dev->priv_flags         |= IFF_LIVE_ADDR_CHANGE;
1315        netif_keep_dst(dev);
1316
1317        return ip_tunnel_init(dev);
1318}
1319
1320static const struct net_device_ops erspan_netdev_ops = {
1321        .ndo_init               = erspan_tunnel_init,
1322        .ndo_uninit             = ip_tunnel_uninit,
1323        .ndo_start_xmit         = erspan_xmit,
1324        .ndo_set_mac_address    = eth_mac_addr,
1325        .ndo_validate_addr      = eth_validate_addr,
1326        .ndo_change_mtu         = ip_tunnel_change_mtu,
1327        .ndo_get_stats64        = ip_tunnel_get_stats64,
1328        .ndo_get_iflink         = ip_tunnel_get_iflink,
1329        .ndo_fill_metadata_dst  = gre_fill_metadata_dst,
1330};
1331
1332static void ipgre_tap_setup(struct net_device *dev)
1333{
1334        ether_setup(dev);
1335        dev->max_mtu = 0;
1336        dev->netdev_ops = &gre_tap_netdev_ops;
1337        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1338        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1339        ip_tunnel_setup(dev, gre_tap_net_id);
1340}
1341
1342bool is_gretap_dev(const struct net_device *dev)
1343{
1344        return dev->netdev_ops == &gre_tap_netdev_ops;
1345}
1346EXPORT_SYMBOL_GPL(is_gretap_dev);
1347
1348static int ipgre_newlink(struct net *src_net, struct net_device *dev,
1349                         struct nlattr *tb[], struct nlattr *data[],
1350                         struct netlink_ext_ack *extack)
1351{
1352        struct ip_tunnel_parm p;
1353        struct ip_tunnel_encap ipencap;
1354        __u32 fwmark = 0;
1355        int err;
1356
1357        if (ipgre_netlink_encap_parms(data, &ipencap)) {
1358                struct ip_tunnel *t = netdev_priv(dev);
1359                err = ip_tunnel_encap_setup(t, &ipencap);
1360
1361                if (err < 0)
1362                        return err;
1363        }
1364
1365        err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1366        if (err < 0)
1367                return err;
1368        return ip_tunnel_newlink(dev, tb, &p, fwmark);
1369}
1370
1371static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1372                            struct nlattr *data[],
1373                            struct netlink_ext_ack *extack)
1374{
1375        struct ip_tunnel *t = netdev_priv(dev);
1376        struct ip_tunnel_encap ipencap;
1377        __u32 fwmark = t->fwmark;
1378        struct ip_tunnel_parm p;
1379        int err;
1380
1381        if (ipgre_netlink_encap_parms(data, &ipencap)) {
1382                err = ip_tunnel_encap_setup(t, &ipencap);
1383
1384                if (err < 0)
1385                        return err;
1386        }
1387
1388        err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
1389        if (err < 0)
1390                return err;
1391
1392        err = ip_tunnel_changelink(dev, tb, &p, fwmark);
1393        if (err < 0)
1394                return err;
1395
1396        t->parms.i_flags = p.i_flags;
1397        t->parms.o_flags = p.o_flags;
1398
1399        if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
1400                ipgre_link_update(dev, !tb[IFLA_MTU]);
1401
1402        return 0;
1403}
1404
1405static size_t ipgre_get_size(const struct net_device *dev)
1406{
1407        return
1408                /* IFLA_GRE_LINK */
1409                nla_total_size(4) +
1410                /* IFLA_GRE_IFLAGS */
1411                nla_total_size(2) +
1412                /* IFLA_GRE_OFLAGS */
1413                nla_total_size(2) +
1414                /* IFLA_GRE_IKEY */
1415                nla_total_size(4) +
1416                /* IFLA_GRE_OKEY */
1417                nla_total_size(4) +
1418                /* IFLA_GRE_LOCAL */
1419                nla_total_size(4) +
1420                /* IFLA_GRE_REMOTE */
1421                nla_total_size(4) +
1422                /* IFLA_GRE_TTL */
1423                nla_total_size(1) +
1424                /* IFLA_GRE_TOS */
1425                nla_total_size(1) +
1426                /* IFLA_GRE_PMTUDISC */
1427                nla_total_size(1) +
1428                /* IFLA_GRE_ENCAP_TYPE */
1429                nla_total_size(2) +
1430                /* IFLA_GRE_ENCAP_FLAGS */
1431                nla_total_size(2) +
1432                /* IFLA_GRE_ENCAP_SPORT */
1433                nla_total_size(2) +
1434                /* IFLA_GRE_ENCAP_DPORT */
1435                nla_total_size(2) +
1436                /* IFLA_GRE_COLLECT_METADATA */
1437                nla_total_size(0) +
1438                /* IFLA_GRE_IGNORE_DF */
1439                nla_total_size(1) +
1440                /* IFLA_GRE_FWMARK */
1441                nla_total_size(4) +
1442                /* IFLA_GRE_ERSPAN_INDEX */
1443                nla_total_size(4) +
1444                /* IFLA_GRE_ERSPAN_VER */
1445                nla_total_size(1) +
1446                /* IFLA_GRE_ERSPAN_DIR */
1447                nla_total_size(1) +
1448                /* IFLA_GRE_ERSPAN_HWID */
1449                nla_total_size(2) +
1450                0;
1451}
1452
1453static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1454{
1455        struct ip_tunnel *t = netdev_priv(dev);
1456        struct ip_tunnel_parm *p = &t->parms;
1457
1458        if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
1459            nla_put_be16(skb, IFLA_GRE_IFLAGS,
1460                         gre_tnl_flags_to_gre_flags(p->i_flags)) ||
1461            nla_put_be16(skb, IFLA_GRE_OFLAGS,
1462                         gre_tnl_flags_to_gre_flags(p->o_flags)) ||
1463            nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
1464            nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
1465            nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
1466            nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
1467            nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
1468            nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
1469            nla_put_u8(skb, IFLA_GRE_PMTUDISC,
1470                       !!(p->iph.frag_off & htons(IP_DF))) ||
1471            nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
1472                goto nla_put_failure;
1473
1474        if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
1475                        t->encap.type) ||
1476            nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
1477                         t->encap.sport) ||
1478            nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
1479                         t->encap.dport) ||
1480            nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
1481                        t->encap.flags))
1482                goto nla_put_failure;
1483
1484        if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
1485                goto nla_put_failure;
1486
1487        if (t->collect_md) {
1488                if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
1489                        goto nla_put_failure;
1490        }
1491
1492        if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
1493                goto nla_put_failure;
1494
1495        if (t->erspan_ver == 1) {
1496                if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
1497                        goto nla_put_failure;
1498        } else if (t->erspan_ver == 2) {
1499                if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
1500                        goto nla_put_failure;
1501                if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
1502                        goto nla_put_failure;
1503        }
1504
1505        return 0;
1506
1507nla_put_failure:
1508        return -EMSGSIZE;
1509}
1510
1511static void erspan_setup(struct net_device *dev)
1512{
1513        struct ip_tunnel *t = netdev_priv(dev);
1514
1515        ether_setup(dev);
1516        dev->netdev_ops = &erspan_netdev_ops;
1517        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1518        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1519        ip_tunnel_setup(dev, erspan_net_id);
1520        t->erspan_ver = 1;
1521}
1522
1523static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1524        [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1525        [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1526        [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1527        [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1528        [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1529        [IFLA_GRE_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1530        [IFLA_GRE_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1531        [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1532        [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1533        [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1534        [IFLA_GRE_ENCAP_TYPE]   = { .type = NLA_U16 },
1535        [IFLA_GRE_ENCAP_FLAGS]  = { .type = NLA_U16 },
1536        [IFLA_GRE_ENCAP_SPORT]  = { .type = NLA_U16 },
1537        [IFLA_GRE_ENCAP_DPORT]  = { .type = NLA_U16 },
1538        [IFLA_GRE_COLLECT_METADATA]     = { .type = NLA_FLAG },
1539        [IFLA_GRE_IGNORE_DF]    = { .type = NLA_U8 },
1540        [IFLA_GRE_FWMARK]       = { .type = NLA_U32 },
1541        [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
1542        [IFLA_GRE_ERSPAN_VER]   = { .type = NLA_U8 },
1543        [IFLA_GRE_ERSPAN_DIR]   = { .type = NLA_U8 },
1544        [IFLA_GRE_ERSPAN_HWID]  = { .type = NLA_U16 },
1545};
1546
1547static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1548        .kind           = "gre",
1549        .maxtype        = IFLA_GRE_MAX,
1550        .policy         = ipgre_policy,
1551        .priv_size      = sizeof(struct ip_tunnel),
1552        .setup          = ipgre_tunnel_setup,
1553        .validate       = ipgre_tunnel_validate,
1554        .newlink        = ipgre_newlink,
1555        .changelink     = ipgre_changelink,
1556        .dellink        = ip_tunnel_dellink,
1557        .get_size       = ipgre_get_size,
1558        .fill_info      = ipgre_fill_info,
1559        .get_link_net   = ip_tunnel_get_link_net,
1560};
1561
1562static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1563        .kind           = "gretap",
1564        .maxtype        = IFLA_GRE_MAX,
1565        .policy         = ipgre_policy,
1566        .priv_size      = sizeof(struct ip_tunnel),
1567        .setup          = ipgre_tap_setup,
1568        .validate       = ipgre_tap_validate,
1569        .newlink        = ipgre_newlink,
1570        .changelink     = ipgre_changelink,
1571        .dellink        = ip_tunnel_dellink,
1572        .get_size       = ipgre_get_size,
1573        .fill_info      = ipgre_fill_info,
1574        .get_link_net   = ip_tunnel_get_link_net,
1575};
1576
1577static struct rtnl_link_ops erspan_link_ops __read_mostly = {
1578        .kind           = "erspan",
1579        .maxtype        = IFLA_GRE_MAX,
1580        .policy         = ipgre_policy,
1581        .priv_size      = sizeof(struct ip_tunnel),
1582        .setup          = erspan_setup,
1583        .validate       = erspan_validate,
1584        .newlink        = ipgre_newlink,
1585        .changelink     = ipgre_changelink,
1586        .dellink        = ip_tunnel_dellink,
1587        .get_size       = ipgre_get_size,
1588        .fill_info      = ipgre_fill_info,
1589        .get_link_net   = ip_tunnel_get_link_net,
1590};
1591
1592struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
1593                                        u8 name_assign_type)
1594{
1595        struct nlattr *tb[IFLA_MAX + 1];
1596        struct net_device *dev;
1597        LIST_HEAD(list_kill);
1598        struct ip_tunnel *t;
1599        int err;
1600
1601        memset(&tb, 0, sizeof(tb));
1602
1603        dev = rtnl_create_link(net, name, name_assign_type,
1604                               &ipgre_tap_ops, tb);
1605        if (IS_ERR(dev))
1606                return dev;
1607
1608        /* Configure flow based GRE device. */
1609        t = netdev_priv(dev);
1610        t->collect_md = true;
1611
1612        err = ipgre_newlink(net, dev, tb, NULL, NULL);
1613        if (err < 0) {
1614                free_netdev(dev);
1615                return ERR_PTR(err);
1616        }
1617
1618        /* openvswitch users expect packet sizes to be unrestricted,
1619         * so set the largest MTU we can.
1620         */
1621        err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
1622        if (err)
1623                goto out;
1624
1625        err = rtnl_configure_link(dev, NULL);
1626        if (err < 0)
1627                goto out;
1628
1629        return dev;
1630out:
1631        ip_tunnel_dellink(dev, &list_kill);
1632        unregister_netdevice_many(&list_kill);
1633        return ERR_PTR(err);
1634}
1635EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
1636
1637static int __net_init ipgre_tap_init_net(struct net *net)
1638{
1639        return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
1640}
1641
1642static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
1643{
1644        ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
1645}
1646
1647static struct pernet_operations ipgre_tap_net_ops = {
1648        .init = ipgre_tap_init_net,
1649        .exit_batch = ipgre_tap_exit_batch_net,
1650        .id   = &gre_tap_net_id,
1651        .size = sizeof(struct ip_tunnel_net),
1652};
1653
1654static int __net_init erspan_init_net(struct net *net)
1655{
1656        return ip_tunnel_init_net(net, erspan_net_id,
1657                                  &erspan_link_ops, "erspan0");
1658}
1659
1660static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
1661{
1662        ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
1663}
1664
1665static struct pernet_operations erspan_net_ops = {
1666        .init = erspan_init_net,
1667        .exit_batch = erspan_exit_batch_net,
1668        .id   = &erspan_net_id,
1669        .size = sizeof(struct ip_tunnel_net),
1670};
1671
1672static int __init ipgre_init(void)
1673{
1674        int err;
1675
1676        pr_info("GRE over IPv4 tunneling driver\n");
1677
1678        err = register_pernet_device(&ipgre_net_ops);
1679        if (err < 0)
1680                return err;
1681
1682        err = register_pernet_device(&ipgre_tap_net_ops);
1683        if (err < 0)
1684                goto pnet_tap_failed;
1685
1686        err = register_pernet_device(&erspan_net_ops);
1687        if (err < 0)
1688                goto pnet_erspan_failed;
1689
1690        err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1691        if (err < 0) {
1692                pr_info("%s: can't add protocol\n", __func__);
1693                goto add_proto_failed;
1694        }
1695
1696        err = rtnl_link_register(&ipgre_link_ops);
1697        if (err < 0)
1698                goto rtnl_link_failed;
1699
1700        err = rtnl_link_register(&ipgre_tap_ops);
1701        if (err < 0)
1702                goto tap_ops_failed;
1703
1704        err = rtnl_link_register(&erspan_link_ops);
1705        if (err < 0)
1706                goto erspan_link_failed;
1707
1708        return 0;
1709
1710erspan_link_failed:
1711        rtnl_link_unregister(&ipgre_tap_ops);
1712tap_ops_failed:
1713        rtnl_link_unregister(&ipgre_link_ops);
1714rtnl_link_failed:
1715        gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1716add_proto_failed:
1717        unregister_pernet_device(&erspan_net_ops);
1718pnet_erspan_failed:
1719        unregister_pernet_device(&ipgre_tap_net_ops);
1720pnet_tap_failed:
1721        unregister_pernet_device(&ipgre_net_ops);
1722        return err;
1723}
1724
1725static void __exit ipgre_fini(void)
1726{
1727        rtnl_link_unregister(&ipgre_tap_ops);
1728        rtnl_link_unregister(&ipgre_link_ops);
1729        rtnl_link_unregister(&erspan_link_ops);
1730        gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1731        unregister_pernet_device(&ipgre_tap_net_ops);
1732        unregister_pernet_device(&ipgre_net_ops);
1733        unregister_pernet_device(&erspan_net_ops);
1734}
1735
1736module_init(ipgre_init);
1737module_exit(ipgre_fini);
1738MODULE_LICENSE("GPL");
1739MODULE_ALIAS_RTNL_LINK("gre");
1740MODULE_ALIAS_RTNL_LINK("gretap");
1741MODULE_ALIAS_RTNL_LINK("erspan");
1742MODULE_ALIAS_NETDEV("gre0");
1743MODULE_ALIAS_NETDEV("gretap0");
1744MODULE_ALIAS_NETDEV("erspan0");
1745